diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 82e8c09da..9324ead7e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -62,7 +62,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
- - name: Test KFP lib
+ - name: Test KFP v1 lib
run: |
source kind/requirements.env
export PATH=$PATH:/tmp/
@@ -93,7 +93,7 @@ jobs:
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- - name: Test KFP worflow run
+ - name: Test KFP v1 worflow run
timeout-minutes: 120
run: |
source kind/requirements.env
@@ -108,6 +108,8 @@ jobs:
chmod 777 /tmp/kubectl
curl https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o /tmp/mc
chmod +x /tmp/mc
+ export DEPLOY_KUBEFLOW=1
make -C kind setup
- make -C transforms workflow-build
+ make -C kfp/kfp_support_lib test
+ make -C transforms/universal/noop/ workflow-build
make -C transforms/universal/noop workflow-test
diff --git a/.make.defaults b/.make.defaults
index 276fa35c9..2cc29b6e5 100644
--- a/.make.defaults
+++ b/.make.defaults
@@ -53,6 +53,9 @@ KIND_CLUSTER_NAME=dataprep
DPK_PYTHON_LIB_DIR=$(REPOROOT)/data-processing-lib/python
DPK_RAY_LIB_DIR=$(REPOROOT)/data-processing-lib/ray
DPK_SPARK_LIB_DIR=$(REPOROOT)/data-processing-lib/spark
+
+KFPv2?=0
+
#######################################################################################
# Lists all targets and optional help text found in the target.
# Adapted from https://stackoverflow.com/a/65243296/45375
@@ -200,7 +203,7 @@ __check_defined = \
cp -p -R ${LIB_PATH}/README.md ${LIB_NAME}
# Build and image using the local Dockerfile and make the data-processing-lib/python
-# available in the current directory for use by the Dockerfile (i.e. to install the library).
+# available in the current directory for use by the Dockerfile (i.e. to install the library).
.PHONY: .defaults.python-lib-src-image
.defaults.python-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings.
@# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-lib/python source
@@ -261,8 +264,8 @@ __check_defined = \
# Install all source from the repo for a python runtime transform into an existing venv
.PHONY: .defaults.install-python-lib-src-venv
-.defaults.install-python-lib-src-venv::
- @# Help: Install Python data processing library source into existing venv
+.defaults.install-python-lib-src-venv::
+ @# Help: Install Python data processing library source into existing venv
@echo Installing Python data processing library source to existing venv
@source venv/bin/activate; \
pip install pytest; \
@@ -276,8 +279,8 @@ __check_defined = \
# Install all source from the repo for a ray runtime transform into an existing venv
.PHONY: .defaults.install-ray-lib-src-venv
-.defaults.install-ray-lib-src-venv::
- @# Help: Install Ray and Python data processing library source into existing venv
+.defaults.install-ray-lib-src-venv::
+ @# Help: Install Ray and Python data processing library source into existing venv
@echo Installing Ray and Python data processing library source to existing venv
@source venv/bin/activate; \
pip install pytest; \
@@ -291,11 +294,10 @@ __check_defined = \
.PHONY: .defaults.spark-lib-src-venv
.defaults.spark-lib-src-venv:: .defaults.create-venv .defaults.install-spark-lib-src-venv .defaults.install-local-requirements-venv
-# Install all source from the repo for a spark runtime transform into an existing venv
+# Install the python-based lib BEFORE spark assuming spark depends on the same version as python source.
.PHONY: .defaults.install-spark-lib-src-venv
-.defaults.install-spark-lib-src-venv::
- @# Help: Install Spark and Python data processing library source into existing venv
- @echo ""
+.defaults.install-spark-lib-src-venv::
+ @# Help: Install Spark and Python data processing library source into existing venv
@echo Installing Spark and Python data processing library source to existing venv
@source venv/bin/activate; \
pip install pytest; \
diff --git a/.make.versions b/.make.versions
index 08ce283f0..5ba577f4b 100644
--- a/.make.versions
+++ b/.make.versions
@@ -13,6 +13,7 @@ RELEASE_VERSION_SUFFIX=.dev6
# Data prep lab wheel version
DPK_LIB_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
DPK_LIB_KFP_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
+DPK_LIB_KFP_VERSION_v2=0.2.0$(RELEASE_VERSION_SUFFIX)
# Begin transform versions/tags
BLOCKLIST_VERSION=0.4.2$(RELEASE_VERSION_SUFFIX)
diff --git a/kfp/doc/setup.md b/kfp/doc/setup.md
index e4803e16b..84385ef0f 100644
--- a/kfp/doc/setup.md
+++ b/kfp/doc/setup.md
@@ -66,7 +66,14 @@ choose your OS system, and process according to "(Optional) Install the MinIO Cl
## Installation steps
-You can create a Kind cluster with all required software installed using the following command:
+Before installation, you have to deside which KFP version do you want to use.
+In order to use KFP v2, please set the following environment variable:
+
+```shell
+export KFPv2=1
+```
+
+Now, you can create a Kind cluster with all required software installed using the following command:
```shell
make setup
diff --git a/kfp/doc/simple_transform_pipeline.md b/kfp/doc/simple_transform_pipeline.md
index 220702cbc..539d3cdf5 100644
--- a/kfp/doc/simple_transform_pipeline.md
+++ b/kfp/doc/simple_transform_pipeline.md
@@ -34,16 +34,16 @@ Note: the project and the explanation below are based on [KFPv1](https://www.kub
* Pipeline wiring - definition of the sequence of invocation (with parameter passing) of participating components
* Additional configuration
-### Imports definition
+### Imports definition
```python
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
+from kfp_support.workflow_support.runtime_utils import (
+ ONE_HOUR_SEC,
+ ONE_WEEK_SEC,
+ ComponentUtils,
)
from kubernetes import client as k8s_client
```
@@ -73,8 +73,8 @@ Ray cluster. For each step we have to define a component that will execute them:
Note: here we are using shared components described in this [document](../kfp_ray_components/README.md) for `create_ray_op`,
`execute_ray_jobs_op` and `cleanup_ray_op`, while `compute_exec_params_op` component is built inline, because it might
differ significantly. For "simple" pipeline cases we can use the
-[default implementation](../kfp_support_lib/src/kfp_support/workflow_support/utils/remote_jobs_utils.py),
-while, for example for exact dedup, we are using a very [specialized one](../transform_workflows/universal/ededup/src/ededup_compute_execution_params.py).
+[default implementation](../kfp_support_lib/src/kfp_support/workflow_support/runtime_utils/remote_jobs_utils.py),
+while, for example for exact dedup, we are using a very [specialized one](../../transforms/universal/ededup/kfp_ray/v2/src/ededup_compute_execution_params.py).
### Input parameters definition
diff --git a/kfp/kfp_ray_components/Dockerfile b/kfp/kfp_ray_components/Dockerfile
index 69f9f0d67..a012640ec 100644
--- a/kfp/kfp_ray_components/Dockerfile
+++ b/kfp/kfp_ray_components/Dockerfile
@@ -1,25 +1,35 @@
FROM docker.io/rayproject/ray:2.9.3-py310
-ARG BUILD_DATE
-ARG GIT_COMMIT
-
-LABEL build-date=$BUILD_DATE
-LABEL git-commit=$GIT_COMMIT
-
# install libraries
COPY requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
-# Copy and install data processing libraries
+# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
-COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
+COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .
-COPY --chown=ray:users kfp_support_lib/ kfp_support_lib/
-RUN cd kfp_support_lib && pip install --no-cache-dir -e .
+COPY --chown=ray:users python_apiserver_client python_apiserver_client/
+RUN cd python_apiserver_client && pip install --no-cache-dir -e .
+
+COPY --chown=ray:users workflow_support_lib workflow_support_lib/
+RUN cd workflow_support_lib && pip install --no-cache-dir -e .
+
+# overwriting the installation of old versions of pydantic
+RUN pip install --no-cache-dir pydantic==2.6.3
+
# remove credentials-containing file
RUN rm requirements.txt
# components
COPY ./src /pipelines/component/src
+
+# Set environment
+ENV KFP_v2=$KFP_v2
+
+# Put these at the end since they seem to upset the docker cache.
+ARG BUILD_DATE
+ARG GIT_COMMIT
+LABEL build-date=$BUILD_DATE
+LABEL git-commit=$GIT_COMMIT
diff --git a/kfp/kfp_ray_components/Makefile b/kfp/kfp_ray_components/Makefile
index a7743d2f3..e8a8c3adb 100644
--- a/kfp/kfp_ray_components/Makefile
+++ b/kfp/kfp_ray_components/Makefile
@@ -2,26 +2,39 @@
# # know where they are running from.
REPOROOT=../..
+# Include the common rules.
+# Use "make help" to see them.
+include $(REPOROOT)/.make.defaults
+
IGNORE := $(shell bash -c "sed -n /=/p ${REPOROOT}/kfp/requirements.env | sed 's/=/:=/' | sed 's/^/export /' > makeenv")
include makeenv
-DOCKER_FILE=Dockerfile
+
+ifeq ($(KFPv2), 1)
+DOCKER_IMAGE_NAME=kfp-data-processing_v2
+DOCKER_IMAGE_VERSION=${KFP_DOCKER_VERSION_v2}
+WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support
+else
DOCKER_IMAGE_NAME=kfp-data-processing
DOCKER_IMAGE_VERSION=${KFP_DOCKER_VERSION}
+WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support
+endif
-# Include the common rules.
-# Use "make help" to see them.
-include $(REPOROOT)/.make.defaults
+
+#DOCKER_IMG=${DOCKER_HOSTNAME}/${DOCKER_NAMESPACE}/${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_VERSION}
+DOCKER_IMG=$(DOCKER_LOCAL_IMAGE)
.PHONY: .lib-src-image
.lib-src-image::
$(MAKE) .defaults.copy-lib LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray
$(MAKE) .defaults.copy-lib LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python
- $(MAKE) .defaults.copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib LIB_NAME=kfp_support_lib
+ $(MAKE) .defaults.copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib/python_apiserver_client LIB_NAME=python_apiserver_client
+ $(MAKE) .defaults.copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib/$(WORKFLOW_SUPPORT_LIB) LIB_NAME=workflow_support_lib
$(MAKE) .defaults.image
rm -rf data-processing-lib-ray
rm -rf data-processing-lib-python
- rm -rf kfp_support_lib
+ rm -rf python_apiserver_client
+ rm -rf workflow_support_lib
.PHONY: image
image: Dockerfile requirements.txt
@@ -34,11 +47,12 @@ set-versions:: reconcile-requirements
.PHONY: reconcile-requirements
reconcile-requirements:
@# Help: Update yaml files to build images tagged as version $(KFP_DOCKER_VERSION)
- sed -i.back "s/kfp-data-processing:[0-9].*/kfp-data-processing:${KFP_DOCKER_VERSION}/" createRayClusterComponent.yaml
- sed -i.back "s/kfp-data-processing:[0-9].*/kfp-data-processing:${KFP_DOCKER_VERSION}/" deleteRayClusterComponent.yaml
- sed -i.back "s/kfp-data-processing:[0-9].*/kfp-data-processing:${KFP_DOCKER_VERSION}/" executeRayJobComponent.yaml
- sed -i.back "s/kfp-data-processing:[0-9].*/kfp-data-processing:${KFP_DOCKER_VERSION}/" executeRayJobComponent_multi_s3.yaml
- sed -i.back "s/kfp-data-processing:[0-9].*/kfp-data-processing:${KFP_DOCKER_VERSION}/" executeSubWorkflowComponent.yaml
+ sed -i.back "s/kfp-data-processing.*:[0-9].*/$(DOCKER_IMAGE_NAME):${KFP_DOCKER_VERSION}/" createRayClusterComponent.yaml
+ sed -i.back "s/kfp-data-processing.*:[0-9].*/$(DOCKER_IMAGE_NAME):${KFP_DOCKER_VERSION}/" deleteRayClusterComponent.yaml
+ sed -i.back "s/kfp-data-processing.*:[0-9].*/$(DOCKER_IMAGE_NAME):${KFP_DOCKER_VERSION}/" executeRayJobComponent.yaml
+ sed -i.back "s/kfp-data-processing.*:[0-9].*/$(DOCKER_IMAGE_NAME):${KFP_DOCKER_VERSION}/" executeRayJobComponent_multi_s3.yaml
+ # TODO remove it for KFPv2
+ sed -i.back "s/kfp-data-processing*:[0-9].*/$(DOCKER_IMAGE_NAME):${KFP_DOCKER_VERSION}/" executeSubWorkflowComponent.yaml
.PHONY: load-image
load-image:
diff --git a/kfp/kfp_ray_components/executeRayJobComponent.yaml b/kfp/kfp_ray_components/executeRayJobComponent.yaml
index fd04cfbe6..6f402affa 100644
--- a/kfp/kfp_ray_components/executeRayJobComponent.yaml
+++ b/kfp/kfp_ray_components/executeRayJobComponent.yaml
@@ -6,7 +6,7 @@ inputs:
- { name: run_id, type: String, description: "The KFP Run ID" }
- { name: additional_params, type: String, description: "additional parameters" }
# The component converts the dictionary to json string
- - { name: exec_params, type: dict, description: "job parameters" }
+ - { name: exec_params, type: JsonObject, description: "job parameters" }
- { name: exec_script_name, type: String, description: "transform script name" }
- { name: server_url, type: String, default: "", description: "url of api server" }
diff --git a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml
index 9f17afed4..fe0700b33 100644
--- a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml
+++ b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml
@@ -8,7 +8,7 @@ inputs:
- { name: server_url, type: String, default: "", description: "url of api server" }
- { name: prefix, type: String, default: "", description: "prefix for extra credentials" }
# The component converts the dictionary to json string
- - { name: exec_params, type: dict, description: "job parameters" }
+ - { name: exec_params, type: JsonObject, description: "job parameters" }
- { name: additional_params, type: String, description: "additional parameters" }
implementation:
diff --git a/kfp/kfp_ray_components/src/create_ray_cluster.py b/kfp/kfp_ray_components/src/create_ray_cluster.py
index 190acf80b..a2b16d577 100644
--- a/kfp/kfp_ray_components/src/create_ray_cluster.py
+++ b/kfp/kfp_ray_components/src/create_ray_cluster.py
@@ -9,10 +9,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
-
import sys
-
-from kfp_support.workflow_support.utils import KFPUtils, RayRemoteJobs
+from workflow_support.runtime_utils import KFPUtils, RayRemoteJobs
def start_ray_cluster(
diff --git a/kfp/kfp_ray_components/src/delete_ray_cluster.py b/kfp/kfp_ray_components/src/delete_ray_cluster.py
index fc5016b87..55cf2f34b 100644
--- a/kfp/kfp_ray_components/src/delete_ray_cluster.py
+++ b/kfp/kfp_ray_components/src/delete_ray_cluster.py
@@ -9,11 +9,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
-
import sys
-
-from kfp_support.workflow_support.utils import KFPUtils, RayRemoteJobs
-
+from workflow_support.runtime_utils import KFPUtils, RayRemoteJobs
# Cleans and shutdowns the Ray cluster
def cleanup_ray_cluster(
diff --git a/kfp/kfp_ray_components/src/execute_ray_job.py b/kfp/kfp_ray_components/src/execute_ray_job.py
index 74d42df1a..173ccb06a 100644
--- a/kfp/kfp_ray_components/src/execute_ray_job.py
+++ b/kfp/kfp_ray_components/src/execute_ray_job.py
@@ -9,9 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
-
-from kfp_support.workflow_support.utils import KFPUtils, execute_ray_jobs
-
+from workflow_support.runtime_utils import KFPUtils, execute_ray_jobs
if __name__ == "__main__":
import argparse
diff --git a/kfp/kfp_ray_components/src/execute_ray_job_multi_s3.py b/kfp/kfp_ray_components/src/execute_ray_job_multi_s3.py
index 1e58a5e66..b7b5d9863 100644
--- a/kfp/kfp_ray_components/src/execute_ray_job_multi_s3.py
+++ b/kfp/kfp_ray_components/src/execute_ray_job_multi_s3.py
@@ -10,8 +10,7 @@
# limitations under the License.
################################################################################
-from kfp_support.workflow_support.utils import KFPUtils, execute_ray_jobs
-
+from workflow_support.runtime_utils import KFPUtils, execute_ray_jobs
if __name__ == "__main__":
import argparse
diff --git a/kfp/kfp_ray_components/src/subworkflow.py b/kfp/kfp_ray_components/src/subworkflow.py
index 52f8c0da4..f15877d86 100644
--- a/kfp/kfp_ray_components/src/subworkflow.py
+++ b/kfp/kfp_ray_components/src/subworkflow.py
@@ -1,9 +1,11 @@
import sys
-from data_processing.utils.params_utils import ParamsUtils
-from kfp_support.workflow_support.utils import KFPUtils, PipelinesUtils
+from workflow_support.runtime_utils import KFPUtils
+from workflow_support.pipeline_utils import PipelinesUtils
+from data_processing.utils import ParamsUtils
+
def invoke_sub_workflow(
name: str, # workflow name
prefix: str, # workflow arguments prefix
diff --git a/kfp/kfp_support_lib/Makefile b/kfp/kfp_support_lib/Makefile
index aad549c7a..31f702221 100644
--- a/kfp/kfp_support_lib/Makefile
+++ b/kfp/kfp_support_lib/Makefile
@@ -1,54 +1,48 @@
-# Define the root of the local git clone for the common rules to be able
-# know where they are running from.
-REPOROOT=../..
-include ${REPOROOT}/.make.versions
-include ${REPOROOT}/kfp/requirements.env
+#################################################################################################################
+#
+# This is the top level makefile, which is intended to be able to process a common set of rules on all
+# sub-projects underneath this directory. Currently, the common/standardized set of rules are as follows
+# and supported by .make.defaults
+#
+# setup:
+# clean:
+# build:
+# test:
+#
+# When finally getting to a makefile that requires a rule implementation, for example to test the build,
+# that makefile should override/implement the rule to meet its needs. Such a rule may continue to recurse
+# using "$(MAKE) -recurse", for example "$(MAKE) test-recurse".
+#
+# Each rule is called recursively on sub-directories and if a similar inclusion is done in the sub-Makefiles,
+# the rules will be applied/executed recursively in their sub-directories.
+#
+#################################################################################################################
-# Include the common rules.
-# Use "make help" to see them.
-include ../../.make.defaults
+REPOROOT=../..
-# Command to run pytest
-PYTHON_VERSION=$(shell $(PYTHON) --version)
-VENV_ACTIVATE=venv/bin/activate
+# Get some common rules for the whole repo
+include $(REPOROOT)/.make.defaults
-DEPLOY_KUBEFLOW ?= 0
+########## ########## ########## ########## ########## ########## ########## ##########
+# Global rules that are generally to be implemented in the sub-directories and can
+# be overridden there (the double colon on the rule makes the overridable).
clean::
- @# Help: Clean up the distribution build and the venv
- rm -r dist venv || true
- rm -rf src/*egg-info || true
- rm -rf *.back || true
-
-
-.check-env:: .check_python_version
- @echo "Checks passed"
-
-set-versions:: .check-env
- $(MAKE) TOML_VERSION=$(DPK_LIB_KFP_VERSION) .defaults.update-toml
- sed -i.back 's/kfp==[0-9].*/kfp==${KFP}",/' pyproject.toml
- sed -i.back 's/ray==[0-9].*/ray==${RAY}",/' pyproject.toml
-
-build-dist:: set-versions .defaults.build-dist
-
-publish:: publish-dist
+ @# Help: Recursively $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
-publish-dist:: .check-env .defaults.publish-dist
+setup::
+ @# Help: Recursively $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
-build:: build-dist
+build::
+ @# Help: Recursively $@ in all subdirs
+ $(MAKE) RULE=$@ .recurse
-venv:: pyproject.toml .check-env .defaults.venv
- $(MAKE) .defaults.install-python-lib-src-venv
- . ${VENV_ACTIVATE}; \
- pip install -e .; \
- pip install pytest pytest-cov;
- @# Help: Create the virtual environment using pyproject.toml
+test::
+ @# Help: Recursively $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
-test:: venv
- @# Help: Use the already-built virtual environment to run pytest on the test directory.
- . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) api_params_test.py;
-ifeq ($(DEPLOY_KUBEFLOW),1)
- . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) kuberay_api_test.py;
- . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) ray_remote_jobs_test.py;
- . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) pipeline_utils_test.py;
-endif
+image::
+ @# Help: Recursively $@ in all subdirs
+ @$(MAKE) RULE=$@ .recurse
diff --git a/kfp/kfp_support_lib/README.md b/kfp/kfp_support_lib/README.md
index 86f3f4360..440fc16c3 100644
--- a/kfp/kfp_support_lib/README.md
+++ b/kfp/kfp_support_lib/README.md
@@ -1,10 +1,13 @@
# KFP support library
This provides support for implementing KFP pipelines automating transform's execution.
-It comprises 2 main modules
+It comprises 3 main modules
-* [api server client](src/kfp_support/api_server_client/README.md)
-* [workflow support](src/kfp_support/workflow_support/README.md)
+* [api server client](python_apiserver_client/README.md)
+* [kfp_v1_workflow_support](kfp_v1_workflow_support//README.md)
+* [kfp_v2_workflow_support](kfp_v2_workflow_support//README.md)
+
+Depends on the using KFV version either `kfp_v1_workflow_support` or `kfp_v2_workflow_support` should be used.
## Development
diff --git a/kfp/kfp_support_lib/doc/kfp_support_library.md b/kfp/kfp_support_lib/doc/kfp_support_library.md
index 0ae5e9d1c..fc571eb81 100644
--- a/kfp/kfp_support_lib/doc/kfp_support_library.md
+++ b/kfp/kfp_support_lib/doc/kfp_support_library.md
@@ -2,7 +2,7 @@
This library is aimed to simplify transform pipelines implementations and consists of 2 main parts:
-* [API Server Client](../src/kfp_support/api_server_client/README.md)
+* [API Server Client](../python_apiserver_client/README.md)
* [workflow support](../src/kfp_support/workflow_support/README.md)
See also how this library is used for [kfp components](../../kfp_ray_components/README.md) implementation
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/Makefile b/kfp/kfp_support_lib/kfp_v1_workflow_support/Makefile
new file mode 100644
index 000000000..9cebae629
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/Makefile
@@ -0,0 +1,82 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../..
+include ${REPOROOT}/.make.versions
+include ${REPOROOT}/kfp/requirements.env
+
+# Include the common rules.
+# Use "make help" to see them.
+include ${REPOROOT}/.make.defaults
+
+# Command to run pytest
+PYTHON_VERSION=$(shell $(PYTHON) --version)
+VENV_ACTIVATE=venv/bin/activate
+
+DEPLOY_KUBEFLOW ?= 0
+
+clean::
+ @# Help: Clean up the distribution build and the venv
+ rm -r dist venv || true
+ rm -rf src/*egg-info || true
+ rm -rf *.back || true
+
+
+.check-env:: .check_python_version
+ @echo "Checks passed"
+
+set-versions:: .check-env
+ @# Help: Copy the Makefile distribution version into the pyproject.toml
+ sed -i.back 's/^version[ ]*=.*/version = "'${DPK_LIB_KFP_VERSION}'"/' pyproject.toml
+ sed -i.back 's/data_prep_toolkit_ray==[0-9].*/data_prep_toolkit_ray==${DPK_LIB_VERSION}",/' pyproject.toml
+ sed -i.back 's/kfp==[0-9].*/kfp==${KFP_v1}",/' pyproject.toml
+ sed -i.back 's/ray=[0-9].*/ray==${RAY}",/' pyproject.toml
+
+build:: set-versions venv
+ifeq ($(KFPv2), 1)
+ # we want to prevent execution of the rule, when we run `make build` in upper directories and KFPv2==1
+ echo "Skipping build as KFPv2 is defined"
+else
+ @# Help: Build the distribution for publishing to a pypi
+ rm -r dist || true
+ rm -rf src/*egg-info || true
+ ${PYTHON} -m pip install --upgrade build
+ ${PYTHON} -m build
+endif
+
+publish:: .check-env
+ @# Help: Publish the wheel to testpypi
+ if [ -d "dist"]; then rm -r dist; fi
+ ${PYTHON} -m pip install --upgrade build
+ ${PYTHON} -m twine check dist/*
+ ${PYTHON} -m twine upload --verbose --non-interactive dist/*
+
+venv:: pyproject.toml .check-env
+ifeq ($(KFPv2), 1)
+ # we want to prevent execution of the rule, when we run `make venv` in upper directories and KFPv2==1
+ echo "Skipping as KFPv2 is defined"
+else
+ @# Help: Create the virtual environment using pyproject.toml
+ rm -rf venv
+ $(PYTHON) -m venv venv
+ . ${VENV_ACTIVATE}; \
+ cd ../../../data-processing-lib/python && make set-versions && cd -; \
+ pip install -e ../../../data-processing-lib/python; \
+ cd ../../../data-prepossesing-lib/ray && make set-versions && cd -; \
+ pip install -e ../../../data-processing-lib/ray; \
+ cd ../python_apiserver_client && make set-versions && cd -; \
+ pip install -e ../python_apiserver_client; \
+ pip install -e .; \
+ pip install pytest pytest-cov
+endif
+
+test:: venv
+ifeq ($(KFPv2), 1)
+ # we want to prevent execution of the rule, when we run `make test` in upper directories and KFPv2==1
+ echo "Skipping test as KFPv2 is defined"
+else
+ @# Help: Use the already-built virtual environment to run pytest on the test directory.
+ifeq ($(DEPLOY_KUBEFLOW),1)
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) ray_remote_jobs_test.py;
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) pipeline_utils_test.py;
+endif
+endif
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/README.md b/kfp/kfp_support_lib/kfp_v1_workflow_support/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/kfp/kfp_support_lib/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
similarity index 86%
rename from kfp/kfp_support_lib/pyproject.toml
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
index 7816cc5ba..679f7ed08 100644
--- a/kfp/kfp_support_lib/pyproject.toml
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
@@ -1,7 +1,7 @@
[project]
-name = "data_prep_toolkit_kfp"
+name = "data_prep_toolkit_kfp_v1"
version = "0.2.0.dev6"
-requires-python = ">=3.10"
+requires-python = ">=3.10,<3.12"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
@@ -15,7 +15,8 @@ dependencies = [
"kfp==1.8.22",
"ray==2.9.3",
"requests",
- "data-prep-toolkit==0.2.0.dev6",
+ "data_prep_toolkit_ray==0.2.0.dev6",
+ "python_apiserver_client==0.1.0",
]
[build-system]
@@ -37,7 +38,8 @@ dev = [
package_dir = ["src"]
[options.packages.find]
-where = ["src/kfp_support"]
+where = ["src/workflow_support"]
+
[tool.pytest.ini_options]
addopts = "--cov --cov-report term-missing --cov-fail-under 10"
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/compile_utils/__init__.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/compile_utils/__init__.py
new file mode 100644
index 000000000..6b99a6be1
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/compile_utils/__init__.py
@@ -0,0 +1,6 @@
+from workflow_support.compile_utils.component import (
+ ONE_HOUR_SEC,
+ ONE_DAY_SEC,
+ ONE_WEEK_SEC,
+ ComponentUtils
+)
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/components_utils.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/compile_utils/component.py
similarity index 63%
rename from kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/components_utils.py
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/compile_utils/component.py
index 46e55024d..460b20e23 100644
--- a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/components_utils.py
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/compile_utils/component.py
@@ -92,50 +92,3 @@ def add_secret_volume_to_com_function(component: dsl.ContainerOp, secretName: st
secret=k8s_client.V1SecretVolumeSource(secret_name=secretName, optional=optional),
)
component.add_pvolumes({mountPoint: vol})
-
- @staticmethod
- def default_compute_execution_params(
- worker_options: str, # ray worker configuration
- actor_options: str, # cpus per actor
- ) -> str:
- """
- This is the most simplistic transform execution parameters computation
- :param worker_options: configuration of ray workers
- :param actor_options: actor request requirements
- :return: number of actors
- """
- import sys
-
- from data_processing.utils import GB, get_logger
- from kfp_support.workflow_support.utils import KFPUtils
-
- logger = get_logger(__name__)
-
- # convert input
- w_options = KFPUtils.load_from_json(worker_options.replace("'", '"'))
- a_options = KFPUtils.load_from_json(actor_options.replace("'", '"'))
- # Compute available cluster resources
- cluster_cpu = w_options["replicas"] * w_options["cpu"]
- cluster_mem = w_options["replicas"] * w_options["memory"]
- cluster_gpu = w_options["replicas"] * w_options.get("gpu", 0.0)
- logger.info(f"Cluster available CPUs {cluster_cpu}, Memory {cluster_mem}, GPUs {cluster_gpu}")
- # compute number of actors
- n_actors_cpu = int(cluster_cpu * 0.85 / a_options.get("num_cpus", 0.5))
- n_actors_memory = int(cluster_mem * 0.85 / (a_options.get("memory", GB) / GB))
- n_actors = min(n_actors_cpu, n_actors_memory)
- # Check if we need gpu calculations as well
- actor_gpu = a_options.get("num_gpus", 0)
- if actor_gpu > 0:
- n_actors_gpu = int(cluster_gpu / actor_gpu)
- n_actors = min(n_actors, n_actors_gpu)
- logger.info(f"Number of actors - {n_actors}")
- if n_actors < 1:
- logger.warning(
- f"Not enough cpu/gpu/memory to run transform, "
- f"required cpu {a_options.get('num_cpus', .5)}, available {cluster_cpu}, "
- f"required memory {a_options.get('memory', 1)}, available {cluster_mem}, "
- f"required cpu {actor_gpu}, available {cluster_gpu}"
- )
- sys.exit(1)
-
- return str(n_actors)
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/__init__.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/__init__.py
new file mode 100644
index 000000000..0e80d97a2
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/__init__.py
@@ -0,0 +1 @@
+from workflow_support.pipeline_utils.pipeline_utils import PipelinesUtils
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/pipeline_utils.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/pipeline_utils.py
similarity index 100%
rename from kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/pipeline_utils.py
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/pipeline_utils.py
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/pipelines_tests_utils.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/pipelines_tests_utils.py
new file mode 100644
index 000000000..183331a2b
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/pipeline_utils/pipelines_tests_utils.py
@@ -0,0 +1,75 @@
+import os
+import sys
+
+from data_processing.utils import get_logger, str2bool
+
+from workflow_support.pipeline_utils import PipelinesUtils
+
+
+logger = get_logger(__name__)
+
+
+def run_test(pipeline_package_path: str, endpoint: str = "http://localhost:8080/", overwrite: bool = True):
+ """
+ Upload and run a single pipeline
+
+ :param pipeline_package_path: Local path to the pipeline package.
+ :param endpoint: endpoint to kfp service.
+ :return the pipeline name as it appears in the kfp GUI.
+ """
+ tmout: int = 800
+ wait: int = 60
+ file_name = os.path.basename(pipeline_package_path)
+ pipeline_name = os.path.splitext(file_name)[0]
+ utils = PipelinesUtils(host=endpoint)
+ pipeline = utils.upload_pipeline(
+ pipeline_package_path=pipeline_package_path,
+ pipeline_name=pipeline_name,
+ overwrite=overwrite,
+ )
+ if pipeline is None:
+ return None
+ experiment = utils.get_experiment_by_name()
+ run_id = utils.start_pipeline(pipeline, experiment, params=[])
+ status, error = utils.wait_pipeline_completion(run_id=run_id, timeout=tmout, wait=wait)
+ if status.lower() not in ["succeeded", "completed"]:
+ # Execution failed
+ logger.warning(f"Pipeline {pipeline_name} failed with error {error} and status {status}")
+ return None
+ logger.info(f"Pipeline {pipeline_name} successfully completed")
+ return pipeline_name
+
+
+if __name__ == "__main__":
+ import argparse
+
+ parser = argparse.ArgumentParser(description="Run sanity test")
+ parser.add_argument("-c", "--command", type=str, choices=["upload", "sanity-test"])
+ parser.add_argument("-e", "--endpoint", type=str, default="http://localhost:8080/")
+ parser.add_argument("-p", "--pipeline_package_path", type=str, default="")
+ parser.add_argument("-o", "--overwrite", type=str, default="True")
+
+ args = parser.parse_args()
+ match args.command:
+ case "upload":
+ file_name = os.path.basename(args.pipeline_package_path)
+ pipeline_name = os.path.splitext(file_name)[0]
+ utils = PipelinesUtils(host=args.endpoint)
+ pipeline = utils.upload_pipeline(
+ pipeline_package_path=args.pipeline_package_path,
+ pipeline_name=pipeline_name,
+ overwrite=str2bool(args.overwrite),
+ )
+ if pipeline is None:
+ sys.exit(1)
+ case "sanity-test":
+ run = run_test(
+ endpoint=args.endpoint,
+ pipeline_package_path=args.pipeline_package_path,
+ overwrite=str2bool(args.overwrite),
+ )
+ if run is None:
+ sys.exit(1)
+ case _:
+ logger.warning("Unsupported command")
+ exit(1)
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/__init__.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/__init__.py
new file mode 100644
index 000000000..8d2cdd648
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/__init__.py
@@ -0,0 +1,2 @@
+from workflow_support.runtime_utils.kfp_utils import KFPUtils
+from workflow_support.runtime_utils.remote_jobs_utils import RayRemoteJobs, execute_ray_jobs
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/kfp_utils.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/kfp_utils.py
similarity index 66%
rename from kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/kfp_utils.py
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/kfp_utils.py
index ef00b0e92..feb081dd2 100644
--- a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/kfp_utils.py
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/kfp_utils.py
@@ -111,3 +111,50 @@ def load_from_json(js: str) -> dict[str, Any]:
except Exception as e:
logger.warning(f"Failed to load parameters {js} with error {e}")
sys.exit(1)
+
+ @staticmethod
+ def default_compute_execution_params(
+ worker_options: str, # ray worker configuration
+ actor_options: str, # cpus per actor
+ ) -> str:
+ """
+ This is the most simplistic transform execution parameters computation
+ :param worker_options: configuration of ray workers
+ :param actor_options: actor request requirements
+ :return: number of actors
+ """
+ import sys
+
+ from data_processing.utils import GB, get_logger
+ from workflow_support.runtime_utils import KFPUtils
+
+ logger = get_logger(__name__)
+
+ # convert input
+ w_options = KFPUtils.load_from_json(worker_options.replace("'", '"'))
+ a_options = KFPUtils.load_from_json(actor_options.replace("'", '"'))
+ # Compute available cluster resources
+ cluster_cpu = w_options["replicas"] * w_options["cpu"]
+ cluster_mem = w_options["replicas"] * w_options["memory"]
+ cluster_gpu = w_options["replicas"] * w_options.get("gpu", 0.0)
+ logger.info(f"Cluster available CPUs {cluster_cpu}, Memory {cluster_mem}, GPUs {cluster_gpu}")
+ # compute number of actors
+ n_actors_cpu = int(cluster_cpu * 0.85 / a_options.get("num_cpus", 0.5))
+ n_actors_memory = int(cluster_mem * 0.85 / (a_options.get("memory", GB) / GB))
+ n_actors = min(n_actors_cpu, n_actors_memory)
+ # Check if we need gpu calculations as well
+ actor_gpu = a_options.get("num_gpus", 0)
+ if actor_gpu > 0:
+ n_actors_gpu = int(cluster_gpu / actor_gpu)
+ n_actors = min(n_actors, n_actors_gpu)
+ logger.info(f"Number of actors - {n_actors}")
+ if n_actors < 1:
+ logger.warning(
+ f"Not enough cpu/gpu/memory to run transform, "
+ f"required cpu {a_options.get('num_cpus', .5)}, available {cluster_cpu}, "
+ f"required memory {a_options.get('memory', 1)}, available {cluster_mem}, "
+ f"required cpu {actor_gpu}, available {cluster_gpu}"
+ )
+ sys.exit(1)
+
+ return str(n_actors)
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/remote_jobs_utils.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/remote_jobs_utils.py
similarity index 99%
rename from kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/remote_jobs_utils.py
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/remote_jobs_utils.py
index 40b26c7a1..0b20b28c4 100644
--- a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/remote_jobs_utils.py
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/src/workflow_support/runtime_utils/remote_jobs_utils.py
@@ -17,8 +17,8 @@
from data_processing.data_access import DataAccess, DataAccessFactory
from data_processing.utils import ParamsUtils, get_logger
-from kfp_support.api_server_client import KubeRayAPIs
-from kfp_support.api_server_client.params import (
+from python_apiserver_client import KubeRayAPIs
+from python_apiserver_client.params import (
DEFAULT_HEAD_START_PARAMS,
DEFAULT_WORKER_START_PARAMS,
Cluster,
@@ -30,7 +30,7 @@
environment_variables_decoder,
volume_decoder,
)
-from kfp_support.workflow_support.utils import KFPUtils
+from workflow_support.runtime_utils import KFPUtils
from ray.job_submission import JobStatus
diff --git a/kfp/kfp_support_lib/test/configmaps.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/test/configmaps.py
similarity index 100%
rename from kfp/kfp_support_lib/test/configmaps.py
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/test/configmaps.py
diff --git a/kfp/kfp_support_lib/test/pipeline_utils_test.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/test/pipeline_utils_test.py
similarity index 90%
rename from kfp/kfp_support_lib/test/pipeline_utils_test.py
rename to kfp/kfp_support_lib/kfp_v1_workflow_support/test/pipeline_utils_test.py
index 2630552ee..200bf1676 100644
--- a/kfp/kfp_support_lib/test/pipeline_utils_test.py
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/test/pipeline_utils_test.py
@@ -10,14 +10,15 @@
# limitations under the License.
################################################################################
-from kfp_support.workflow_support.utils import PipelinesUtils
+from workflow_support.pipeline_utils import PipelinesUtils
+server_url = "http://localhost:8080/"
def test_pipelines():
"""
Test pipelines utils
"""
- utils = PipelinesUtils(host="http://localhost:8080/kfp")
+ utils = PipelinesUtils(host=server_url)
# get pipeline by name
pipeline = utils.get_pipeline_by_name("[Tutorial] Data passing in python components")
assert pipeline is not None
diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/test/ray_remote_jobs_test.py b/kfp/kfp_support_lib/kfp_v1_workflow_support/test/ray_remote_jobs_test.py
new file mode 100644
index 000000000..ab25573b0
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/test/ray_remote_jobs_test.py
@@ -0,0 +1,91 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from configmaps import ConfigmapsManager
+from python_apiserver_client.params import ConfigMapVolume
+from workflow_support.runtime_utils import RayRemoteJobs
+
+server_url = "http://localhost:8080/ray/"
+
+def test_ray_remote_jobs():
+ """
+ Test the full cycle of job submission
+ :return:
+ """
+ # This shows how to create volumes dictionary
+ volumes = [
+ ConfigMapVolume(
+ name="code-sample",
+ mount_path="/home/ray/samples",
+ source="ray-job-code-sample",
+ items={"sample_code.py": "sample_code.py"},
+ )
+ ]
+ dct_volumes = {"volumes": [v.to_dict() for v in volumes]}
+
+ head_node = {
+ "cpu": 2,
+ "memory": 4,
+ "image": "rayproject/ray:2.9.3-py310",
+ # Ray start params, just to show
+ "ray_start_params": {"metrics-export-port": "8080", "num-cpus": "0", "dashboard-host": "0.0.0.0"},
+ "image_pull_policy": "Always",
+ } | dct_volumes
+
+ worker_node = {
+ "cpu": 2,
+ "memory": 4,
+ "image": "rayproject/ray:2.9.3-py310",
+ "replicas": 1,
+ "min_replicas": 1,
+ "max_replicas": 1,
+ "image_pull_policy": "Always",
+ } | dct_volumes
+
+ # Create configmap for testing
+ cm_manager = ConfigmapsManager()
+ cm_manager.delete_code_map()
+ cm_manager.create_code_map()
+
+ # create cluster
+ remote_jobs = RayRemoteJobs(server_url=server_url)
+ status, error = remote_jobs.create_ray_cluster(
+ name="job-test", namespace="default", head_node=head_node, worker_nodes=[worker_node]
+ )
+ print(f"Created cluster - status: {status}, error: {error}")
+ assert status == 200
+ assert error is None
+ # submitting ray job
+ runtime_env = """
+ pip:
+ - requests==2.26.0
+ - pendulum==2.1.2
+ env_vars:
+ counter_name: test_counter
+ """
+ status, error, submission = remote_jobs.submit_job(
+ name="job-test",
+ namespace="default",
+ request={},
+ runtime_env=runtime_env,
+ executor="/home/ray/samples/sample_code.py",
+ )
+ print(f"submit job - status: {status}, error: {error}, submission id {submission}")
+ assert status == 200
+ assert error is None
+ # print execution log
+ remote_jobs.follow_execution(name="job-test", namespace="default", submission_id=submission, print_timeout=20)
+ # cleanup
+ status, error = remote_jobs.delete_ray_cluster(name="job-test", namespace="default")
+ print(f"Deleted cluster - status: {status}, error: {error}")
+ assert status == 200
+ assert error is None
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/Makefile b/kfp/kfp_support_lib/kfp_v2_workflow_support/Makefile
new file mode 100644
index 000000000..30921f37f
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/Makefile
@@ -0,0 +1,83 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../..
+include ${REPOROOT}/.make.versions
+include ${REPOROOT}/kfp/requirements.env
+
+# Include the common rules.
+# Use "make help" to see them.
+include ${REPOROOT}/.make.defaults
+
+# Command to run pytest
+PYTHON_VERSION=$(shell $(PYTHON) --version)
+VENV_ACTIVATE=venv/bin/activate
+
+DEPLOY_KUBEFLOW ?= 0
+
+clean::
+ @# Help: Clean up the distribution build and the venv
+ rm -r dist venv || true
+ rm -rf src/*egg-info || true
+ rm -rf *.back || true
+
+
+.check-env:: .check_python_version
+ @echo "Checks passed"
+
+set-versions:: .check-env
+ @# Help: Copy the Makefile distribution version into the pyproject.toml
+ sed -i.back 's/^version[ ]*=.*/version = "'${DPK_LIB_KFP_VERSION_v2}'"/' pyproject.toml
+ sed -i.back 's/data_prep_toolkit_ray==[0-9].*/data_prep_toolkit_ray==${DPK_LIB_VERSION}",/' pyproject.toml
+ sed -i.back 's/kfp==[0-9].*/kfp==${KFP_v2}",/' pyproject.toml
+ sed -i.back 's/ray=[0-9].*/ray==${RAY}",/' pyproject.toml
+
+build:: set-versions venv
+ifneq ($(KFPv2), 1)
+ # we want to prevent execution of the rule, when we run `make build` in upper directories and KFPv2 is not set
+ echo "Skipping build as KFPv2 is not defined"
+else
+ @# Help: Build the distribution for publishing to a pypi
+ rm -r dist || true
+ rm -rf src/*egg-info || true
+ ${PYTHON} -m pip install --upgrade build
+ ${PYTHON} -m build
+endif
+
+publish:: .check-env
+ @# Help: Publish the wheel to testpypi
+ if [ -d "dist"]; then rm -r dist; fi
+ ${PYTHON} -m pip install --upgrade build
+ ${PYTHON} -m twine check dist/*
+ ${PYTHON} -m twine upload --verbose --non-interactive dist/*
+
+venv:: pyproject.toml .check-env
+ifneq ($(KFPv2), 1)
+ # we want to prevent execution of the rule, when we run `make venv` in upper directories and KFPv2 is not set
+ echo "Skipping venv as KFPv2 is not defined"
+else
+ @# Help: Create the virtual environment using pyproject.toml
+ rm -rf venv
+ $(PYTHON) -m venv venv
+ . ${VENV_ACTIVATE}; \
+ cd ../../../data-processing-lib/python && make set-versions && cd -; \
+ pip install -e ../../../data-processing-lib/python; \
+ cd ../../../data-prepossesing-lib/ray && make set-versions && cd -; \
+ pip install -e ../../../data-processing-lib/ray; \
+ cd ../python_apiserver_client && make set-versions && cd -; \
+ pip install -e ../python_apiserver_client; \
+ pip install -e .; \
+ pip install pytest pytest-cov
+endif
+
+test:: venv
+ifneq ($(KFPv2), 1)
+ # we want to prevent execution of the rule, when we run `make test` in upper directories and KFPv2 is not set
+ echo "Skipping test as KFPv2 is not defined"
+else
+ @# Help: Use the already-built virtual environment to run pytest on the test directory.
+ifeq ($(DEPLOY_KUBEFLOW),1)
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) kuberay_api_test.py;
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) ray_remote_jobs_test.py;
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) pipeline_utils_test.py;
+endif
+endif
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/README.md b/kfp/kfp_support_lib/kfp_v2_workflow_support/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
new file mode 100644
index 000000000..3e1607ee6
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
@@ -0,0 +1,49 @@
+[project]
+name = "data_prep_toolkit_kfp_v2"
+version = "0.2.0.dev6"
+requires-python = ">=3.10,<3.12"
+description = "Data Preparation Kit Library. KFP support"
+license = {text = "Apache-2.0"}
+readme = {file = "README.md", content-type = "text/markdown"}
+authors = [
+ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
+ { name = "Alexey Roytman", email = "roytman@il.ibm.com" },
+ { name = "Mohammad Nassar", email = "Mohammad.Nassar@ibm.com" },
+ { name = "Revital Eres", email = "eres@il.ibm.com" },
+]
+dependencies = [
+ "kfp==2.7.0",
+ "kfp-kubernetes==1.2.0",
+ "ray==2.9.3",
+ "requests",
+ "data_prep_toolkit_ray==0.2.0.dev6",
+ "python_apiserver_client",
+]
+
+[build-system]
+requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
+build-backend = "setuptools.build_meta"
+
+[project.optional-dependencies]
+dev = [
+ "twine",
+ "pytest>=7.3.2",
+ "pytest-dotenv>=0.5.2",
+ "pytest-env>=1.0.0",
+ "pre-commit>=3.3.2",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.10.0",
+]
+
+[options]
+package_dir = ["src"]
+
+[options.packages.find]
+where = ["src/workflow_support"]
+
+[tool.pytest.ini_options]
+addopts = "--cov --cov-report term-missing --cov-fail-under 10"
+markers = ["unit: unit tests", "integration: integration tests"]
+
+[tool.coverage.run]
+include = ["src/*"]
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/compile_utils/__init__.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/compile_utils/__init__.py
new file mode 100644
index 000000000..6b99a6be1
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/compile_utils/__init__.py
@@ -0,0 +1,6 @@
+from workflow_support.compile_utils.component import (
+ ONE_HOUR_SEC,
+ ONE_DAY_SEC,
+ ONE_WEEK_SEC,
+ ComponentUtils
+)
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/compile_utils/component.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/compile_utils/component.py
new file mode 100644
index 000000000..4fa47290f
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/compile_utils/component.py
@@ -0,0 +1,58 @@
+import kfp.dsl as dsl
+from kfp import kubernetes
+from typing import Dict
+
+RUN_NAME = "KFP_RUN_NAME"
+
+ONE_HOUR_SEC = 60 * 60
+ONE_DAY_SEC = ONE_HOUR_SEC * 24
+ONE_WEEK_SEC = ONE_DAY_SEC * 7
+
+class ComponentUtils:
+ """
+ Class containing methods supporting building pipelines
+ """
+
+ @staticmethod
+ def add_settings_to_component(
+ task: dsl.PipelineTask,
+ timeout: int,
+ image_pull_policy: str = "IfNotPresent",
+ cache_strategy: bool = False,
+ ) -> None:
+ """
+ Add settings to kfp task
+ :param task: kfp task
+ :param timeout: timeout to set to the component in seconds
+ :param image_pull_policy: pull policy to set to the component
+ :param cache_strategy: cache strategy
+ """
+
+ kubernetes.use_field_path_as_env(task, env_name=RUN_NAME,
+ field_path="metadata.annotations['pipelines.kubeflow.org/run_name']")
+ # Set cashing
+ task.set_caching_options(enable_caching=cache_strategy)
+ # image pull policy
+ kubernetes.set_image_pull_policy(task, image_pull_policy)
+ # Set the timeout for the task to one day (in seconds)
+ kubernetes.set_timeout(task, seconds=timeout)
+
+ @staticmethod
+ def set_s3_env_vars_to_component(
+ task: dsl.PipelineTask,
+ secret: str = '',
+ env2key: Dict[str, str] = {'s3-key': 'S3_KEY', 's3-secret': 'S3_SECRET', 's3-endpoint': 'ENDPOINT'},
+ prefix: str = None,
+ ) -> None:
+ """
+ Set S3 env variables to KFP component
+ :param task: kfp task
+ :param secret: secret name with the S3 credentials
+ :param env2key: dict with mapping each env variable to a key in the secret
+ :param prefix: prefix to add to env name
+ """
+
+ if prefix is not None:
+ for env_name, _ in env2key.items():
+ env2key[prefix + "_" + env_name] = env2key.pop(env_name)
+ kubernetes.use_secret_as_env(task=task, secret_name='s3-secret', secret_key_to_env=env2key)
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/__init__.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/__init__.py
new file mode 100644
index 000000000..0e80d97a2
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/__init__.py
@@ -0,0 +1 @@
+from workflow_support.pipeline_utils.pipeline_utils import PipelinesUtils
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/pipeline_utils.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/pipeline_utils.py
new file mode 100644
index 000000000..7566f6b2e
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/pipeline_utils.py
@@ -0,0 +1,173 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import datetime
+import time
+from typing import Any, Optional
+
+from data_processing.utils import get_logger
+from kfp_server_api import models
+
+from kfp import Client
+
+
+logger = get_logger(__name__)
+
+
+class PipelinesUtils:
+ """
+ Helper class for pipeline management
+ """
+
+ def __init__(self, host: str = "http://localhost:8080"):
+ """
+ Initialization
+ :param host: host to connect to
+ """
+ self.kfp_client = Client(host=host)
+
+ def upload_pipeline(
+ self,
+ pipeline_package_path: str = None,
+ pipeline_name: str = None,
+ overwrite: bool = False,
+ description: str = None,
+ ) -> models.api_pipeline.ApiPipeline:
+ """
+ Uploads the pipeline
+ :param pipeline_package_path: Local path to the pipeline package.
+ :param pipeline_name: Optional. Name of the pipeline to be shown in the UI
+ :param overwrite: Optional. If pipeline exists, delete it before creating a new one.
+ :param description: Optional. Description of the pipeline to be shown in the UI.
+ :return: Server response object containing pipeline id and other information.
+ """
+ if overwrite:
+ pipeline = self.get_pipeline_by_name(name=pipeline_name)
+ if pipeline is not None:
+ try:
+ logger.info(f"pipeline {pipeline_name} already exists. Trying to delete it.")
+ self.kfp_client.delete_pipeline(pipeline_id=pipeline.id)
+ except Exception as e:
+ logger.warning(f"Exception deleting pipeline {e} before uploading")
+ return None
+ try:
+ pipeline = self.kfp_client.upload_pipeline(
+ pipeline_package_path=pipeline_package_path, pipeline_name=pipeline_name, description=description
+ )
+ except Exception as e:
+ logger.warning(f"Exception uploading pipeline {e}")
+ return None
+ if pipeline is None:
+ logger.warning(f"Failed to upload pipeline {pipeline_name}.")
+ return None
+ logger.info("Pipeline uploaded")
+ return pipeline
+
+ def delete_pipeline(self, pipeline_id):
+ """
+ Delete pipeline.
+ :param pipeline_id: id of the pipeline.
+ :return
+ Returns:
+ Object. If the method is called asynchronously, returns the request thread.
+ Raises:
+ kfp_server_api.ApiException: If pipeline is not found.
+ """
+ return self.kfp_client.delete_pipeline(pipeline_id)
+
+ def start_pipeline(
+ self,
+ pipeline: models.api_pipeline.ApiPipeline,
+ experiment: models.api_experiment.ApiExperiment,
+ params: Optional[dict[str, Any]],
+ ) -> str:
+ """
+ Start a specified pipeline.
+ :param pipeline: pipeline definition
+ :param experiment: experiment to use
+ :param params: pipeline parameters
+ :return: the id of the run object
+ """
+ job_name = pipeline.name + " " + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+ try:
+ run_id = self.kfp_client.run_pipeline(
+ experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline.id, params=params
+ )
+ logger.info(f"Pipeline run {job_name} submitted")
+ return run_id.id
+ except Exception as e:
+ logger.warning(f"Exception starting pipeline {e}")
+ return None
+
+ def get_experiment_by_name(self, name: str = "Default") -> models.api_experiment.ApiExperiment:
+ """
+ Get experiment by name
+ :param name: name
+ :return: experiment
+ """
+ try:
+ return self.kfp_client.get_experiment(experiment_name=name)
+ except Exception as e:
+ logger.warning(f"Exception getting experiment {e}")
+ return None
+
+ def get_pipeline_by_name(self, name: str, np: int = 100) -> models.api_pipeline.ApiPipeline:
+ """
+ Given pipeline name, return the pipeline
+ :param name: pipeline name
+ :param np: page size for pipeline query. For large clusters with many pipelines, you might need to
+ increase this number
+ :return: pipeline
+ """
+ try:
+ # Get all pipelines
+ pipelines = self.kfp_client.list_pipelines(page_size=np).pipelines
+ required = list(filter(lambda p: name in p.name, pipelines))
+ if len(required) != 1:
+ logger.warning(f"Failure to get pipeline. Number of pipelines with name {name} is {len(required)}")
+ return None
+ return required[0]
+
+ except Exception as e:
+ logger.warning(f"Exception getting pipeline {e}")
+ return None
+
+ def wait_pipeline_completion(self, run_id: str, timeout: int = -1, wait: int = 600) -> tuple[str, str]:
+ """
+ Waits for a pipeline run to complete
+ :param run_id: run id
+ :param timeout: timeout (sec) (-1 wait forever)
+ :param wait: internal wait (sec)
+ :return: Completion status and an error message if such exists
+ """
+ try:
+ if timeout > 0:
+ end = time.time() + timeout
+ else:
+ end = 2**63 - 1
+ run_details = self.kfp_client.get_run(run_id=run_id)
+ status = run_details.run.status
+ while status is None or status.lower() not in ["succeeded", "completed", "failed", "skipped", "error"]:
+ time.sleep(wait)
+ if (end - time.time()) < 0:
+ return "failed", f"Execution is taking too long"
+ run_details = self.kfp_client.get_run(run_id=run_id)
+ status = run_details.run.status
+ logger.info(f"Got pipeline execution status {status}")
+
+ if status.lower() in ["succeeded", "completed"]:
+ return status, ""
+ return status, run_details.run.error
+
+ except Exception as e:
+ logger.warning(f"Failed waiting pipeline completion {e}")
+ return "failed", str(e)
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/pipelines_tests_utils.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/pipelines_tests_utils.py
similarity index 100%
rename from kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/pipelines_tests_utils.py
rename to kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/pipeline_utils/pipelines_tests_utils.py
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/__init__.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/__init__.py
new file mode 100644
index 000000000..8d2cdd648
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/__init__.py
@@ -0,0 +1,2 @@
+from workflow_support.runtime_utils.kfp_utils import KFPUtils
+from workflow_support.runtime_utils.remote_jobs_utils import RayRemoteJobs, execute_ray_jobs
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/kfp_utils.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/kfp_utils.py
new file mode 100644
index 000000000..0e9951282
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/kfp_utils.py
@@ -0,0 +1,160 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import json
+import os
+import re
+import sys
+from typing import Any
+
+from data_processing.utils import get_logger
+
+
+logger = get_logger(__name__)
+
+
+class KFPUtils:
+ """
+ Helper utilities for KFP implementations
+ """
+
+ @staticmethod
+ def credentials(
+ access_key: str = "S3_KEY", secret_key: str = "S3_SECRET", endpoint: str = "ENDPOINT"
+ ) -> tuple[str, str, str]:
+ """
+ Get credentials from the environment
+ :param access_key: environment variable for access key
+ :param secret_key: environment variable for secret key
+ :param endpoint: environment variable for S3 endpoint
+ :return:
+ """
+ s3_key = os.getenv(access_key, None)
+ s3_secret = os.getenv(secret_key, None)
+ s3_endpoint = os.getenv(endpoint, None)
+ if s3_key is None or s3_secret is None or s3_endpoint is None:
+ logger.warning("Failed to load s3 credentials")
+ return s3_key, s3_secret, s3_endpoint
+
+ @staticmethod
+ def get_namespace() -> str:
+ """
+ Get k8 namespace that we are running it
+ :return:
+ """
+ ns = ""
+ try:
+ file = open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r")
+ except Exception as e:
+ logger.warning(
+ f"Failed to open /var/run/secrets/kubernetes.io/serviceaccount/namespace file, " f"exception {e}"
+ )
+ else:
+ with file:
+ ns = file.read()
+ return ns
+
+ @staticmethod
+ def runtime_name(ray_name: str = "", run_id: str = "") -> str:
+ """
+ Get unique runtime name
+ :param ray_name:
+ :param run_id:
+ :return: runtime name
+ """
+ # K8s objects cannot contain special characters, except '_', All characters should be in lower case.
+ if ray_name != "":
+ ray_name = ray_name.replace("_", "-").lower()
+ pattern = r"[^a-zA-Z0-9-]" # the ray_name cannot contain upper case here, but leave it just in case.
+ ray_name = re.sub(pattern, "", ray_name)
+ else:
+ ray_name = "a"
+ # the return value plus namespace name will be the name of the Ray Route,
+ # which length is restricted to 64 characters,
+ # therefore we restrict the return name by 15 character.
+ if run_id != "":
+ return f"{ray_name[:9]}-{run_id[:5]}"
+ return ray_name[:15]
+
+ @staticmethod
+ def dict_to_req(d: dict[str, Any], executor: str = "transformer_launcher.py") -> str:
+ res = f"python {executor} "
+ for key, value in d.items():
+ if str(value) != "":
+ if isinstance(value, str):
+ if '"' in value:
+ logger.warning(f"can't parse inputs with double quotation marks, please use single quotation marks instead")
+ res += f'--{key}="{value}" '
+ elif isinstance(value, bool):
+ if value:
+ res += f"--{key} "
+ else:
+ res += f"--{key}={value} "
+
+ logger.info(f"request to execute: {res}")
+ return res
+
+ # Load a string that represents a json to python dictionary
+ @staticmethod
+ def load_from_json(js: str) -> dict[str, Any]:
+ try:
+ return json.loads(js)
+ except Exception as e:
+ logger.warning(f"Failed to load parameters {js} with error {e}")
+ sys.exit(1)
+
+ @staticmethod
+ def default_compute_execution_params(
+ worker_options: str, # ray worker configuration
+ actor_options: str, # cpus per actor
+ ) -> str:
+ """
+ This is the most simplistic transform execution parameters computation
+ :param worker_options: configuration of ray workers
+ :param actor_options: actor request requirements
+ :return: number of actors
+ """
+ import sys
+
+ from data_processing.utils import GB, get_logger
+ from workflow_support.runtime_utils import KFPUtils
+
+ logger = get_logger(__name__)
+
+ # convert input
+ w_options = KFPUtils.load_from_json(worker_options.replace("'", '"'))
+ a_options = KFPUtils.load_from_json(actor_options.replace("'", '"'))
+ # Compute available cluster resources
+ cluster_cpu = w_options["replicas"] * w_options["cpu"]
+ cluster_mem = w_options["replicas"] * w_options["memory"]
+ cluster_gpu = w_options["replicas"] * w_options.get("gpu", 0.0)
+ logger.info(f"Cluster available CPUs {cluster_cpu}, Memory {cluster_mem}, GPUs {cluster_gpu}")
+ # compute number of actors
+ n_actors_cpu = int(cluster_cpu * 0.85 / a_options.get("num_cpus", 0.5))
+ n_actors_memory = int(cluster_mem * 0.85 / (a_options.get("memory", GB) / GB))
+ n_actors = min(n_actors_cpu, n_actors_memory)
+ # Check if we need gpu calculations as well
+ actor_gpu = a_options.get("num_gpus", 0)
+ if actor_gpu > 0:
+ n_actors_gpu = int(cluster_gpu / actor_gpu)
+ n_actors = min(n_actors, n_actors_gpu)
+ logger.info(f"Number of actors - {n_actors}")
+ if n_actors < 1:
+ logger.warning(
+ f"Not enough cpu/gpu/memory to run transform, "
+ f"required cpu {a_options.get('num_cpus', .5)}, available {cluster_cpu}, "
+ f"required memory {a_options.get('memory', 1)}, available {cluster_mem}, "
+ f"required cpu {actor_gpu}, available {cluster_gpu}"
+ )
+ sys.exit(1)
+
+ return str(n_actors)
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/remote_jobs_utils.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/remote_jobs_utils.py
new file mode 100644
index 000000000..0b20b28c4
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/src/workflow_support/runtime_utils/remote_jobs_utils.py
@@ -0,0 +1,527 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+import re
+import sys
+import time
+from typing import Any
+
+from data_processing.data_access import DataAccess, DataAccessFactory
+from data_processing.utils import ParamsUtils, get_logger
+from python_apiserver_client import KubeRayAPIs
+from python_apiserver_client.params import (
+ DEFAULT_HEAD_START_PARAMS,
+ DEFAULT_WORKER_START_PARAMS,
+ Cluster,
+ ClusterSpec,
+ HeadNodeSpec,
+ RayJobRequest,
+ Template,
+ WorkerNodeSpec,
+ environment_variables_decoder,
+ volume_decoder,
+)
+from workflow_support.runtime_utils import KFPUtils
+from ray.job_submission import JobStatus
+
+
+logger = get_logger(__name__)
+
+
+class RayRemoteJobs:
+ """
+ class supporting Ray remote jobs
+ """
+
+ ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
+
+ def __init__(
+ self,
+ server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888",
+ default_image: str = "rayproject/ray:2.9.3-py310",
+ http_retries: int = 5,
+ wait_interval: int = 2,
+ ):
+ """
+ Initialization
+ :param server_url: API server URL. Default value is assuming running inside the cluster
+ :param default_image - default Ray image
+ :param wait_interval: wait interval
+ :param http_retries: http retries
+ """
+ self.api_server_client = KubeRayAPIs(
+ server_url=server_url, http_retries=http_retries, wait_interval=wait_interval
+ )
+ self.default_image = default_image
+
+ def create_ray_cluster(
+ self,
+ name: str,
+ namespace: str,
+ head_node: dict[str, Any],
+ worker_nodes: list[dict[str, Any]],
+ wait_cluster_ready: int = -1,
+ ) -> tuple[int, str]:
+ """
+ Create Ray cluster
+ :param name: name, _ are not allowed in the name
+ :param namespace: namespace
+ :param head_node: head node specification dictionary including the following:
+ mandatory fields:
+ cpu - number of cpus
+ memory memory size (GB)
+ image - image to use
+ optional fields:
+ gpu - number of gpus
+ gpu_accelerator - gpu accelerator to use
+ image_pull_secret - image pull secret
+ ray_start_params - dictionary of ray start parameters
+ volumes - list of volumes for head node
+ service_account - service account to use (has to be created)
+ environment - dictionary of head node environment
+ annotations: dictionary of head node annotation
+ labels: dictionary of head node labels
+ image_pull_policy: image pull policy, default IfNotPresent
+
+ :param worker_nodes: an array of worker node specification dictionary including the following:
+ mandatory fields:
+ cpu - number of cpus
+ memory memory size (GB)
+ image - image to use
+ max_replicas - max replicas for this worker group
+ optional fields:
+ gpu - number of gpus
+ gpu_accelerator - gpu accelerator to use
+ replicas - number of replicas to create for this group (default 1)
+ min_replicas - min number of replicas for this group (default 0)
+ image_pull_secret - image pull secret
+ ray_start_params - dictionary of ray start parameters
+ volumes - list of volumes for this group
+ service_account - service account to use (has to be created)
+ environment - dictionary of node of this group environment
+ annotations: dictionary of node of this group annotation
+ labels: dictionary of node of this group labels
+ image_pull_policy: image pull policy, default IfNotPresent
+
+ :param wait_cluster_ready - time to wait for cluster ready sec (-1 forever)
+ :return:tuple containing
+ http return code
+ message - only returned if http return code is not equal to 200
+ """
+ # start with templates
+ # head_node
+ cpus = head_node.get("cpu", 1)
+ memory = head_node.get("memory", 1)
+ gpus = head_node.get("gpu", 0)
+ accelerator = head_node.get("gpu_accelerator", None)
+ head_node_template_name = f"{name}-head-template"
+ _, _ = self.api_server_client.delete_compute_template(ns=namespace, name=head_node_template_name)
+ head_template = Template(
+ name=head_node_template_name,
+ namespace=namespace,
+ cpu=cpus,
+ memory=memory,
+ gpu=gpus,
+ gpu_accelerator=accelerator,
+ )
+ status, error = self.api_server_client.create_compute_template(head_template)
+ if status != 200:
+ return status, error
+ worker_template_names = [""] * len(worker_nodes)
+ index = 0
+ # For every worker group
+ for worker_node in worker_nodes:
+ cpus = worker_node.get("cpu", 1)
+ memory = worker_node.get("memory", 1)
+ gpus = worker_node.get("gpu", 0)
+ accelerator = worker_node.get("gpu_accelerator", None)
+ worker_node_template_name = f"{name}-worker-template-{index}"
+ _, _ = self.api_server_client.delete_compute_template(ns=namespace, name=worker_node_template_name)
+ worker_template = Template(
+ name=worker_node_template_name,
+ namespace=namespace,
+ cpu=cpus,
+ memory=memory,
+ gpu=gpus,
+ gpu_accelerator=accelerator,
+ )
+ status, error = self.api_server_client.create_compute_template(worker_template)
+ if status != 200:
+ return status, error
+ worker_template_names[index] = worker_node_template_name
+ index += 1
+ # Build head node spec
+ image = head_node.get("image", self.default_image)
+ image_pull_secret = head_node.get("image_pull_secret", None)
+ image_pull_policy = head_node.get("image_pull_policy", None)
+ ray_start_params = head_node.get("ray_start_params", DEFAULT_HEAD_START_PARAMS)
+ volumes_dict = head_node.get("volumes", None)
+ service_account = head_node.get("service_account", None)
+ environment_dict = head_node.get("environment", None)
+ annotations = head_node.get("annotations", None)
+ labels = head_node.get("labels", None)
+ if volumes_dict is None:
+ volumes = None
+ else:
+ volumes = [volume_decoder(v) for v in volumes_dict]
+ if environment_dict is None:
+ environment = None
+ else:
+ environment = environment_variables_decoder(environment_dict)
+ head_node_spec = HeadNodeSpec(
+ compute_template=head_node_template_name,
+ image=image,
+ ray_start_params=ray_start_params,
+ volumes=volumes,
+ service_account=service_account,
+ image_pull_secret=image_pull_secret,
+ environment=environment,
+ annotations=annotations,
+ labels=labels,
+ image_pull_policy=image_pull_policy,
+ )
+ # build worker nodes
+ worker_groups = []
+ index = 0
+ for worker_node in worker_nodes:
+ max_replicas = worker_node.get("max_replicas", 1)
+ replicas = worker_node.get("replicas", 1)
+ min_replicas = worker_node.get("min_replicas", 0)
+ image = worker_node.get("image", self.default_image)
+ image_pull_secret = worker_node.get("image_pull_secret", None)
+ image_pull_policy = head_node.get("image_pull_policy", None)
+ ray_start_params = worker_node.get("ray_start_params", DEFAULT_WORKER_START_PARAMS)
+ volumes_dict = worker_node.get("volumes", None)
+ service_account = worker_node.get("service_account", None)
+ environment_dict = worker_node.get("environment", None)
+ annotations = worker_node.get("annotations", None)
+ labels = worker_node.get("labels", None)
+ if volumes_dict is None:
+ volumes = None
+ else:
+ volumes = [volume_decoder(v) for v in volumes_dict]
+ if environment_dict is None:
+ environment = None
+ else:
+ environment = environment_variables_decoder(environment_dict)
+ worker_groups.append(
+ WorkerNodeSpec(
+ group_name=f"worker-group-{index}",
+ compute_template=worker_template_names[index],
+ image=image,
+ max_replicas=max_replicas,
+ replicas=replicas,
+ min_replicas=min_replicas,
+ ray_start_params=ray_start_params,
+ volumes=volumes,
+ service_account=service_account,
+ image_pull_secret=image_pull_secret,
+ environment=environment,
+ annotations=annotations,
+ labels=labels,
+ image_pull_policy=image_pull_policy,
+ )
+ )
+ index += 1
+ # Build cluster spec
+ cluster_spec = ClusterSpec(head_node=head_node_spec, worker_groups=worker_groups)
+ # Build cluster
+ cluster = Cluster(name=name, namespace=namespace, user="dataprep", version="2.9.3", cluster_spec=cluster_spec)
+ status, error = self.api_server_client.create_cluster(cluster)
+ if status != 200:
+ return status, error
+ # Wait for cluster ready
+ return self.api_server_client.wait_cluster_ready(name=name, ns=namespace, wait=wait_cluster_ready)
+
+ def delete_ray_cluster(self, name: str, namespace: str) -> tuple[int, str]:
+ """
+ Clean up Ray cluster and supporting template
+ :param name: cluster name
+ :param namespace: cluster namespace
+ :return:tuple containing
+ http return code
+ message - only returned if http return code is not equal to 200
+ """
+ # delete cluster
+ status, error = self.api_server_client.delete_cluster(ns=namespace, name=name)
+ if status != 200:
+ return status, error
+ # clean up templates
+ status, error, template_array = self.api_server_client.list_compute_templates_namespace(ns=namespace)
+ if status != 200:
+ return status, error
+ for template in template_array:
+ if template.name.startswith(name):
+ status, error = self.api_server_client.delete_compute_template(ns=namespace, name=template.name)
+ if status != 200:
+ return status, error
+ return status, error
+
+ def submit_job(
+ self,
+ name: str,
+ namespace: str,
+ request: dict[str, Any],
+ runtime_env: str = None,
+ executor: str = "transformer_launcher.py",
+ ) -> tuple[int, str, str]:
+ """
+ Submit job for execution
+ :param name: cluster name
+ :param namespace: cluster namespace
+ :param request: dictionary of the remote job request
+ :param runtime_env: runtime environment string
+ :param executor: python file to execute
+ :return:tuple containing
+ http return code
+ message - only returned if http return code is not equal to 200
+ submission id - submission id
+ """
+ # Although the cluster is ready, the service web server might not be ready yet at this point.
+ # To ensure that it is ready, trying to get jobs info from the cluster. Even if it fails
+ # couple of times, its harmless
+ _, _, _ = self.api_server_client.list_job_info(ns=namespace, name=name)
+ time.sleep(5)
+ # Build job request
+ job_request = RayJobRequest(entrypoint=KFPUtils.dict_to_req(d=request, executor=executor))
+ if runtime_env is not None:
+ job_request.runtime_env = runtime_env
+ return self.api_server_client.submit_job(ns=namespace, name=name, job_request=job_request)
+
+ def _get_job_status(self, name: str, namespace: str, submission_id: str) -> tuple[int, str, str]:
+ """
+ Get job status
+ :param name: cluster name
+ :param namespace: cluster namespace
+ :param submission_id: job submission ID
+ :return:tuple containing
+ http return code
+ message - only returned if http return code is not equal to 200
+ status - job status
+ """
+ # get job info
+ status, error, info = self.api_server_client.get_job_info(ns=namespace, name=name, sid=submission_id)
+ if status // 100 != 2:
+ return status, error, ""
+ return status, error, info.status
+
+ @staticmethod
+ def _print_log(log: str, previous_log_len: int) -> None:
+ """
+ Prints the delta between current and previous logs
+ :param log: current log
+ :param previous_log_len: previous log length
+ :return: None
+ """
+ l_to_print = log[previous_log_len:]
+ if len(l_to_print) > 0:
+ l_to_print = RayRemoteJobs.ansi_escape.sub("", l_to_print)
+ print(l_to_print)
+
+ def follow_execution(
+ self,
+ name: str,
+ namespace: str,
+ submission_id: str,
+ data_access: DataAccess = None,
+ job_ready_timeout: int = 600,
+ print_timeout: int = 120,
+ ) -> None:
+ """
+ Follow remote job execution
+ :param name: cluster name
+ :param namespace: cluster namespace
+ :param submission_id: job submission ID
+ :param data_access - data access class
+ :param job_ready_timeout: timeout to wait for fob to become ready
+ :param print_timeout: print interval
+ :return: None
+ """
+ # Wait for job to start running
+ job_status = JobStatus.PENDING
+ while job_status != JobStatus.RUNNING and job_ready_timeout > 0:
+ status, error, job_status = self._get_job_status(
+ name=name, namespace=namespace, submission_id=submission_id
+ )
+ if status // 100 != 2:
+ sys.exit(1)
+ if job_status in {JobStatus.STOPPED, JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.RUNNING}:
+ break
+ time.sleep(self.api_server_client.wait_interval)
+ job_ready_timeout -= self.api_server_client.wait_interval
+ logger.info(f"job status is {job_status}")
+ if job_ready_timeout <= 0:
+ logger.warning("timed out waiting for job become ready, exiting")
+ sys.exit(1)
+ # While job is running print log
+ previous_log_len = 0
+ # At this point job could succeeded, failed, stop or running. So print log regardless
+ status, error, log = self.api_server_client.get_job_log(ns=namespace, name=name, sid=submission_id)
+ if status // 100 != 2:
+ sys.exit(1)
+ self._print_log(log=log, previous_log_len=previous_log_len)
+ previous_log_len = len(log)
+ # continue printing log, while job is running
+ while job_status == JobStatus.RUNNING:
+ time.sleep(print_timeout)
+ status, error, log = self.api_server_client.get_job_log(ns=namespace, name=name, sid=submission_id)
+ if status // 100 != 2:
+ sys.exit(1)
+ self._print_log(log=log, previous_log_len=previous_log_len)
+ previous_log_len = len(log)
+ status, error, job_status = self._get_job_status(
+ name=name, namespace=namespace, submission_id=submission_id
+ )
+ if status // 100 != 2:
+ sys.exit(1)
+ # Print the final log and execution status
+ # Sleep here to avoid racing conditions
+ time.sleep(2)
+ status, error, log = self.api_server_client.get_job_log(ns=namespace, name=name, sid=submission_id)
+ if status // 100 != 2:
+ sys.exit(1)
+ self._print_log(log=log, previous_log_len=previous_log_len)
+ logger.info(f"Job completed with execution status {job_status}")
+ if job_status != JobStatus.SUCCEEDED:
+ sys.exit(1)
+ if data_access is None:
+ return
+ # Here data access is either S3 or lakehouse both of which contain self.output_folder
+ try:
+ output_folder = data_access.get_output_folder()
+ except Exception as e:
+ logger.warning(f"failed to get output folder {e}")
+ return
+ output_folder = output_folder if output_folder.endswith("/") else output_folder + "/"
+ execution_log_path = f"{output_folder}execution.log"
+ logger.info(f"saving execution log to {execution_log_path}")
+ data_access.save_file(path=execution_log_path, data=bytes(log, "UTF-8"))
+
+
+def _execute_remote_job(
+ name: str,
+ ns: str,
+ script: str,
+ params: dict[str, Any],
+ data_access_params: dict[str, Any],
+ additional_params: dict[str, Any],
+ remote_jobs: RayRemoteJobs,
+) -> None:
+ """
+ Execute remote job on Ray cluster
+ :param name: cluster name
+ :param ns: execution/cluster namespace
+ :param additional_params: additional parameters for the job
+ :param data_access_params: data access parameters
+ :param params: job execution parameters (specific for a specific transform,
+ generated by the transform workflow)
+ :param script: script to run (has to be present in the image)
+ :param remote_jobs: remote jobs execution support class
+ :return:
+ """
+
+ status, error, submission = remote_jobs.submit_job(name=name, namespace=ns, request=params, executor=script)
+ if status != 200:
+ logger.error(f"Failed to submit job - status: {status}, error: {error}")
+ exit(1)
+
+ logger.info(f"submitted job successfully, submission id {submission}")
+ # create data access
+ data_factory = DataAccessFactory()
+ data_factory.apply_input_params(args=data_access_params)
+ data_access = data_factory.create_data_access()
+ # print execution log
+ remote_jobs.follow_execution(
+ name=name,
+ namespace=ns,
+ submission_id=submission,
+ data_access=data_access,
+ print_timeout=additional_params.get("wait_print_tmout", 120),
+ job_ready_timeout=additional_params.get("wait_job_ready_tmout", 600),
+ )
+
+
+def execute_ray_jobs(
+ name: str, # name of Ray cluster
+ additional_params: dict[str, Any],
+ e_params: dict[str, Any],
+ exec_script_name: str,
+ server_url: str,
+) -> None:
+ """
+ Execute Ray jobs on a cluster periodically printing execution log. Completes when all Ray job complete.
+ All of the jobs will be executed, although some of the jobs may fail.
+ :param name: cluster name
+ :param additional_params: additional parameters for the job
+ :param e_params: job execution parameters (specific for a specific transform,
+ generated by the transform workflow)
+ :param exec_script_name: script to run (has to be present in the image)
+ :param server_url: API server url
+ :return: None
+ """
+ # prepare for execution
+ ns = KFPUtils.get_namespace()
+ if ns == "":
+ logger.warning(f"Failed to get namespace")
+ sys.exit(1)
+ # create remote jobs class
+ remote_jobs = RayRemoteJobs(
+ server_url=server_url,
+ http_retries=additional_params.get("http_retries", 5),
+ wait_interval=additional_params.get("wait_interval", 2),
+ )
+ # find config parameter
+ config = ParamsUtils.get_config_parameter(e_params)
+ if config is None:
+ exit(1)
+ # get config value
+ config_value = KFPUtils.load_from_json(e_params[config].replace("'", '"'))
+ s3_creds = KFPUtils.load_from_json(e_params["data_s3_cred"].replace("'", '"'))
+ if type(config_value) is not list:
+ # single request
+ return _execute_remote_job(
+ name=name,
+ ns=ns,
+ script=exec_script_name,
+ data_access_params={config: config_value, "data_s3_cred": s3_creds},
+ params=e_params,
+ additional_params=additional_params,
+ remote_jobs=remote_jobs,
+ )
+ # remove config key from the dictionary
+ launch_params = dict(e_params)
+ del launch_params[config]
+ # Loop through all configuration
+ n_launches = 0
+ for conf in config_value:
+ # populate individual config and launch
+ launch_params[config] = ParamsUtils.convert_to_ast(d=conf)
+ try:
+ _execute_remote_job(
+ name=name,
+ ns=ns,
+ script=exec_script_name,
+ data_access_params={config: conf, "data_s3_cred": s3_creds},
+ params=launch_params,
+ additional_params=additional_params,
+ remote_jobs=remote_jobs,
+ )
+ n_launches += 1
+ except SystemExit:
+ logger.warning(f"Failed to execute job for configuration {conf}")
+ continue
+
+ if n_launches == 0:
+ logger.warning("All executions failed")
+ sys.exit(1)
+ else:
+ logger.info(f"{n_launches} ot of {len(config_value)} succeeded")
diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/test/pipeline_utils_test.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/test/pipeline_utils_test.py
new file mode 100644
index 000000000..200bf1676
--- /dev/null
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/test/pipeline_utils_test.py
@@ -0,0 +1,34 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from workflow_support.pipeline_utils import PipelinesUtils
+
+server_url = "http://localhost:8080/"
+
+def test_pipelines():
+ """
+ Test pipelines utils
+ """
+ utils = PipelinesUtils(host=server_url)
+ # get pipeline by name
+ pipeline = utils.get_pipeline_by_name("[Tutorial] Data passing in python components")
+ assert pipeline is not None
+ # get default experiment
+ experiment = utils.get_experiment_by_name()
+ assert experiment is not None
+ # start pipeline
+ run = utils.start_pipeline(pipeline=pipeline, experiment=experiment, params={})
+ assert run is not None
+ # wait for completion
+ status, error = utils.wait_pipeline_completion(run_id=run, wait=10)
+ assert status.lower() == "succeeded"
+ assert error == ""
diff --git a/kfp/kfp_support_lib/test/ray_remote_jobs_test.py b/kfp/kfp_support_lib/kfp_v2_workflow_support/test/ray_remote_jobs_test.py
similarity index 93%
rename from kfp/kfp_support_lib/test/ray_remote_jobs_test.py
rename to kfp/kfp_support_lib/kfp_v2_workflow_support/test/ray_remote_jobs_test.py
index 5ae76a5f5..f409550e9 100644
--- a/kfp/kfp_support_lib/test/ray_remote_jobs_test.py
+++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/test/ray_remote_jobs_test.py
@@ -11,9 +11,10 @@
################################################################################
from configmaps import ConfigmapsManager
-from kfp_support.api_server_client.params import ConfigMapVolume
-from kfp_support.workflow_support.utils import RayRemoteJobs
+from python_apiserver_client.params import ConfigMapVolume
+from workflow_support.runtime_utils import RayRemoteJobs
+server_url = "http:localhost:8080/ray/"
def test_ray_remote_jobs():
"""
@@ -56,7 +57,7 @@ def test_ray_remote_jobs():
cm_manager.create_code_map()
# create cluster
- remote_jobs = RayRemoteJobs(server_url="http://localhost:8080/ray")
+ remote_jobs = RayRemoteJobs(server_url=server_url)
status, error = remote_jobs.create_ray_cluster(
name="job-test", namespace="default", head_node=head_node, worker_nodes=[worker_node]
)
diff --git a/kfp/kfp_support_lib/python_apiserver_client/.gitignore b/kfp/kfp_support_lib/python_apiserver_client/.gitignore
new file mode 100644
index 000000000..3ff12a7a8
--- /dev/null
+++ b/kfp/kfp_support_lib/python_apiserver_client/.gitignore
@@ -0,0 +1,32 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+
+# Distribution / packaging
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+.tox/
+htmlcov
+.coverage
+.cache
+nosetests.xml
+coverage.xml
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/python_apiserver_client/Makefile b/kfp/kfp_support_lib/python_apiserver_client/Makefile
new file mode 100644
index 000000000..642d475d1
--- /dev/null
+++ b/kfp/kfp_support_lib/python_apiserver_client/Makefile
@@ -0,0 +1,62 @@
+# Define the root of the local git clone for the common rules to be able
+# know where they are running from.
+REPOROOT=../../..
+include ${REPOROOT}/.make.versions
+include ${REPOROOT}/kfp/requirements.env
+
+# Include the common rules.
+# Use "make help" to see them.
+include ../../../.make.defaults
+
+# Command to run pytest
+PYTHON_VERSION=$(shell $(PYTHON) --version)
+VENV_ACTIVATE=venv/bin/activate
+
+DEPLOY_KUBEFLOW ?= 0
+
+clean::
+ @# Help: Clean up the distribution build and the venv
+ rm -r dist venv || true
+ rm -rf src/*egg-info || true
+ rm -rf *.back || true
+
+
+.check-env:: .check_python_version
+ @echo "Checks passed"
+
+set-versions:: .check-env
+ @# Help: Copy the Makefile distribution version into the pyproject.toml
+ sed -i.back 's/data-prep-toolkit==[0-9].*/data-prep-toolkit==${DPK_LIB_VERSION}",/' pyproject.toml
+
+build:: set-versions venv
+ @# Help: Build the distribution for publishing to a pypi
+ rm -r dist || true
+ rm -rf src/*egg-info || true
+ ${PYTHON} -m pip install --upgrade build
+ ${PYTHON} -m build
+
+publish:: .check-env
+ @# Help: Publish the wheel to testpypi
+ if [ -d "dist"]; then rm -r dist; fi
+ ${PYTHON} -m pip install --upgrade build
+ ${PYTHON} -m twine check dist/*
+ ${PYTHON} -m twine upload --verbose --non-interactive dist/*
+
+venv::pyproject.toml .check-env
+ @# Help: Create the virtual environment using pyproject.toml
+ rm -rf venv
+ $(PYTHON) -m venv venv
+ . ${VENV_ACTIVATE}; \
+ pip install --upgrade pip; \
+ cd ../../../data-processing-lib/python && make set-versions && cd -; \
+ pip install -e ../../../data-processing-lib/python; \
+ pip install -e .; \
+ pip install pytest pytest-cov
+
+test:: venv
+ @# Help: Use the already-built virtual environment to run pytest on the test directory.
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) api_params_test.py;
+ifeq ($(DEPLOY_KUBEFLOW),1)
+ . ${VENV_ACTIVATE}; export PYTHONPATH=../src; cd test; $(PYTEST) kuberay_api_test.py;
+endif
+
diff --git a/kfp/kfp_support_lib/python_apiserver_client/README.md b/kfp/kfp_support_lib/python_apiserver_client/README.md
new file mode 100644
index 000000000..de489adcd
--- /dev/null
+++ b/kfp/kfp_support_lib/python_apiserver_client/README.md
@@ -0,0 +1,4 @@
+# KubeRay API server APIs
+
+This is a copy of [Kuberay API server-client python APIs](https://github.com/ray-project/kuberay/tree/master/clients/python-apiserver-client)
+Because these APIs are not exposed by any PyPi, we added them to the project
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/python_apiserver_client/pyproject.toml b/kfp/kfp_support_lib/python_apiserver_client/pyproject.toml
new file mode 100644
index 000000000..a933f1bbc
--- /dev/null
+++ b/kfp/kfp_support_lib/python_apiserver_client/pyproject.toml
@@ -0,0 +1,28 @@
+[build-system]
+requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
+build-backend = "setuptools.build_meta"
+[options]
+package_dir = ["src"]
+[project]
+name = "python_apiserver_client"
+version = "0.1.0"
+dependencies = [
+ "requests",
+ "kubernetes",
+ "data-prep-toolkit==0.2.0.dev6",
+]
+authors = [
+ { name="KubeRay project"},
+]
+description = "A Kuberay python client library to manage clusters based on the KubeRay API server"
+readme = {file = "README.md", content-type = "text/markdown"}
+license = {text = "Apache-2.0"}
+requires-python = ">=3.10"
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "License :: Apache License 2.0",
+ "Operating System :: OS Independent",
+]
+
+[project.urls]
+"Homepage" = "https://github.com/ray-project/kuberay"
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/__init__.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/__init__.py
new file mode 100644
index 000000000..e6cdbec9a
--- /dev/null
+++ b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/__init__.py
@@ -0,0 +1 @@
+from python_apiserver_client.kuberay_apis import KubeRayAPIs
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/kuberay_apis.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/kuberay_apis.py
similarity index 99%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/kuberay_apis.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/kuberay_apis.py
index 270815e77..9051e7c73 100644
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/kuberay_apis.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/kuberay_apis.py
@@ -14,7 +14,7 @@
import requests
from data_processing.utils import get_logger
-from kfp_support.api_server_client.params import (
+from python_apiserver_client.params import (
Cluster,
RayJobInfo,
RayJobRequest,
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/__init__.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/__init__.py
similarity index 65%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/__init__.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/__init__.py
index e5a7d70fa..207f961a9 100644
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/__init__.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/__init__.py
@@ -1,4 +1,4 @@
-from kfp_support.api_server_client.params.templates import (
+from python_apiserver_client.params.templates import (
TolerationOperation,
TolerationEffect,
Toleration,
@@ -7,7 +7,7 @@
template_decoder,
templates_decoder,
)
-from kfp_support.api_server_client.params.volumes import (
+from python_apiserver_client.params.volumes import (
HostPath,
MountPropagationMode,
AccessMode,
@@ -20,25 +20,25 @@
SecretVolume,
volume_decoder,
)
-from kfp_support.api_server_client.params.environmentvariables import (
+from python_apiserver_client.params.environmentvariables import (
EnvVarSource,
EnvVarFrom,
EnvironmentVariables,
env_var_from_decoder,
environment_variables_decoder,
)
-from kfp_support.api_server_client.params.headnode import (
+from python_apiserver_client.params.headnode import (
ServiceType,
HeadNodeSpec,
DEFAULT_HEAD_START_PARAMS,
head_node_spec_decoder,
)
-from kfp_support.api_server_client.params.workernode import (
+from python_apiserver_client.params.workernode import (
WorkerNodeSpec,
DEFAULT_WORKER_START_PARAMS,
worker_node_spec_decoder,
)
-from kfp_support.api_server_client.params.cluster import (
+from python_apiserver_client.params.cluster import (
Environment,
AutoscalerOptions,
ClusterSpec,
@@ -50,4 +50,4 @@
cluster_decoder,
clusters_decoder,
)
-from kfp_support.api_server_client.params.jobsubmission import RayJobRequest, RayJobInfo
+from python_apiserver_client.params.jobsubmission import RayJobRequest, RayJobInfo
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/cluster.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/cluster.py
similarity index 99%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/cluster.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/cluster.py
index 922a14bef..5e1ee4867 100644
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/cluster.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/cluster.py
@@ -13,7 +13,7 @@
import enum
from typing import Any
-from kfp_support.api_server_client.params import (
+from python_apiserver_client.params import (
BaseVolume,
EnvironmentVariables,
HeadNodeSpec,
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/environmentvariables.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/environmentvariables.py
similarity index 100%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/environmentvariables.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/environmentvariables.py
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/headnode.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/headnode.py
similarity index 99%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/headnode.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/headnode.py
index 7a9d4120f..37c2e2572 100644
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/headnode.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/headnode.py
@@ -13,7 +13,7 @@
import enum
from typing import Any
-from kfp_support.api_server_client.params import (
+from python_apiserver_client.params import (
BaseVolume,
EnvironmentVariables,
environment_variables_decoder,
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/jobsubmission.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/jobsubmission.py
similarity index 100%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/jobsubmission.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/jobsubmission.py
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/templates.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/templates.py
similarity index 100%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/templates.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/templates.py
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/volumes.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/volumes.py
similarity index 100%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/volumes.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/volumes.py
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/workernode.py b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/workernode.py
similarity index 99%
rename from kfp/kfp_support_lib/src/kfp_support/api_server_client/params/workernode.py
rename to kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/workernode.py
index ddcf193cc..3a9f8e439 100644
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/params/workernode.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/src/python_apiserver_client/params/workernode.py
@@ -12,7 +12,7 @@
from typing import Any
-from kfp_support.api_server_client.params import (
+from python_apiserver_client.params import (
BaseVolume,
EnvironmentVariables,
environment_variables_decoder,
diff --git a/kfp/kfp_support_lib/test/api_params_test.py b/kfp/kfp_support_lib/python_apiserver_client/test/api_params_test.py
similarity index 99%
rename from kfp/kfp_support_lib/test/api_params_test.py
rename to kfp/kfp_support_lib/python_apiserver_client/test/api_params_test.py
index 804c84aad..53740c939 100644
--- a/kfp/kfp_support_lib/test/api_params_test.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/test/api_params_test.py
@@ -12,7 +12,7 @@
import json
-from kfp_support.api_server_client.params import (
+from python_apiserver_client.params import (
DEFAULT_HEAD_START_PARAMS,
DEFAULT_WORKER_START_PARAMS,
AccessMode,
diff --git a/kfp/kfp_support_lib/python_apiserver_client/test/configmaps.py b/kfp/kfp_support_lib/python_apiserver_client/test/configmaps.py
new file mode 100644
index 000000000..65e53e828
--- /dev/null
+++ b/kfp/kfp_support_lib/python_apiserver_client/test/configmaps.py
@@ -0,0 +1,72 @@
+# (C) Copyright IBM Corp. 2024.
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from kubernetes import client, config
+
+
+CMAP_VALUE = """
+import ray
+import os
+import requests
+
+ray.init()
+
+@ray.remote
+class Counter:
+ def __init__(self):
+ # Used to verify runtimeEnv
+ self.name = os.getenv("counter_name")
+ assert self.name == "test_counter"
+ self.counter = 0
+
+ def inc(self):
+ self.counter += 1
+
+ def get_counter(self):
+ return "{} got {}".format(self.name, self.counter)
+
+counter = Counter.remote()
+
+for _ in range(5):
+ ray.get(counter.inc.remote())
+ print(ray.get(counter.get_counter.remote()))
+
+# Verify that the correct runtime env was used for the job.
+assert requests.__version__ == "2.26.0"
+"""
+CMAP_NAME = "ray-job-code-sample"
+
+
+class ConfigmapsManager:
+ """
+ Simple support class to manage config maps. Assumes local access to Kubectl
+ """
+
+ def __init__(self):
+ config.load_kube_config()
+ self.api_instance = client.CoreV1Api()
+
+ def list_configmaps(self) -> list[str]:
+ cm_list = self.api_instance.list_namespaced_config_map(namespace="default").items
+ return [cm.metadata.name for cm in cm_list]
+
+ def create_code_map(self) -> None:
+ cmap = client.V1ConfigMap()
+ cmap.metadata = client.V1ObjectMeta(name=CMAP_NAME)
+ cmap.data = {"sample_code.py": CMAP_VALUE}
+ self.api_instance.create_namespaced_config_map(namespace="default", body=cmap)
+
+ def delete_code_map(self) -> None:
+ try:
+ self.api_instance.delete_namespaced_config_map(name="ray-job-code-sample", namespace="default")
+ except Exception as e:
+ print("config map ray-job-code-sample does not exist")
diff --git a/kfp/kfp_support_lib/test/kuberay_api_test.py b/kfp/kfp_support_lib/python_apiserver_client/test/kuberay_api_test.py
similarity index 97%
rename from kfp/kfp_support_lib/test/kuberay_api_test.py
rename to kfp/kfp_support_lib/python_apiserver_client/test/kuberay_api_test.py
index b2a444ce3..d4dd12a5e 100644
--- a/kfp/kfp_support_lib/test/kuberay_api_test.py
+++ b/kfp/kfp_support_lib/python_apiserver_client/test/kuberay_api_test.py
@@ -13,8 +13,8 @@
import time
from configmaps import ConfigmapsManager
-from kfp_support.api_server_client import KubeRayAPIs
-from kfp_support.api_server_client.params import (
+from python_apiserver_client import KubeRayAPIs
+from python_apiserver_client.params import (
DEFAULT_WORKER_START_PARAMS,
Cluster,
ClusterSpec,
@@ -30,13 +30,15 @@
WorkerNodeSpec,
)
+server_url = "http://localhost:8080/ray"
+
def test_templates():
"""
Test template
"""
# create API server
- apis = KubeRayAPIs(server_url="http://localhost:8080/ray")
+ apis = KubeRayAPIs(server_url=server_url)
# cleanup
_, _ = apis.delete_compute_template(ns="default", name="default-template")
# create
@@ -81,7 +83,7 @@ def test_cluster():
Test cluster
"""
# create API server
- apis = KubeRayAPIs(server_url="http://localhost:8080/ray")
+ apis = KubeRayAPIs(server_url=server_url)
# cleanup
_, _ = apis.delete_compute_template(ns="default", name="default-template")
_, _ = apis.delete_cluster(ns="default", name="test")
@@ -181,7 +183,7 @@ def test_job_submission():
:return:
"""
# create API server
- apis = KubeRayAPIs(server_url="http://localhost:8080/ray")
+ apis = KubeRayAPIs(server_url=server_url)
# cleanup
_, _ = apis.delete_compute_template(ns="default", name="default-template")
_, _ = apis.delete_cluster(ns="default", name="test-job")
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/README.md b/kfp/kfp_support_lib/src/kfp_support/api_server_client/README.md
deleted file mode 100644
index 423f743a1..000000000
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# KubeRay API server APIs
-
-This is a copy of [Kuberay API server python APIs](https://github.com/ray-project/kuberay/tree/master/clients/python-apiserver-client)
-Because these APIs are not exposed by any PyPi, we added them to the project
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/src/kfp_support/api_server_client/__init__.py b/kfp/kfp_support_lib/src/kfp_support/api_server_client/__init__.py
deleted file mode 100644
index 60cbbc2f2..000000000
--- a/kfp/kfp_support_lib/src/kfp_support/api_server_client/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from kfp_support.api_server_client.kuberay_apis import KubeRayAPIs
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/README.md b/kfp/kfp_support_lib/src/kfp_support/workflow_support/README.md
deleted file mode 100644
index b477e9a42..000000000
--- a/kfp/kfp_support_lib/src/kfp_support/workflow_support/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Workflow Utils
-
-This library provides 3 main classes:
-* KFPUtils - helper utilities for KFP implementations
-* PipelinesUtils - helper class for pipeline management based on KFP client
-* RayRemoteJobs - class supporting Ray remote jobs
-
-## KFPUtils
-
-This class contains a collection of functions useful for KFP pipelines implementation, which include:
-* credentials - get S3 credentials from the environment
-* get_namespace - get the name of the kubernetes namespace we are running in
-* runtime_name - generates unique runtime name
-* dict_to_req - convert dictionary of request parameters to a proper formatted JSON string
-* load_from_json - convert json string to dictionary and exit with error if conversion fails
-
-## PipelinesUtils
-
-This class provides some higher level functionality based on the capabilities of the python KFP client, including"
-* get_experiment_by_name obtains KFP experiment object based on its name
-* get_pipeline_by_name obtains KFP pipeline object based on its name
-* start_pipeline start a pipeline represented by pipeline object in experiment represented by experiment object and a
-dictionary of parameters. It returns kfp run ID
-* wait_pipeline_completion - waits for the completion of the pipeline run with the given ID
-
-## RayRemoteJobs
-
-At the moment there is no "standard" approach for KubeRay remote APIs. There are several options available,
-including [codeflareSDK](https://github.com/project-codeflare/codeflare-sdk/tree/1fe04c3022d98bc286454dea2cd1e31709961bd2/src/codeflare_sdk)
-[KubeRay Python Apis](https://github.com/ray-project/kuberay/tree/master/clients/python-client) and
-[KubeRay API server APIs](https://github.com/ray-project/kuberay/tree/master/clients/python-apiserver-client) to name a few.
-We are using here KubeRay API server APIs, but in order to simplify possible transition to another APIs. this class
-implements 4 high-level methods, that allow to hide the specifics of the particular APIs. This methods are:
-* create_ray_cluster - creates Ray cluster.
-* delete_ray_cluster - deletes Ray cluster.
-* submit_job - submits Ray job to the cluster
-* follow_execution - watching job execution to completion, periodically printing out the job log
-These basic methods can be used as a foundation of any KFP pipeline implementation
-
-## ComponentUtils
-
-This class provides some methods to simplify building pipelines:
-* add_settings_to_component - adds settings to component, including timeout, image_pull_policy and cache strategy
-* set_cos_env_vars_to_component - sets environment variables to support S3
-* default_compute_execution_params - default implementation of compute execution parameters (based on CPU, GPU and memory requirements)
\ No newline at end of file
diff --git a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/__init__.py b/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/__init__.py
deleted file mode 100644
index 166032380..000000000
--- a/kfp/kfp_support_lib/src/kfp_support/workflow_support/utils/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from kfp_support.workflow_support.utils.kfp_utils import KFPUtils
-from kfp_support.workflow_support.utils.pipeline_utils import PipelinesUtils
-from kfp_support.workflow_support.utils.components_utils import ComponentUtils, ONE_HOUR_SEC, ONE_DAY_SEC, ONE_WEEK_SEC
-from kfp_support.workflow_support.utils.remote_jobs_utils import RayRemoteJobs, execute_ray_jobs
diff --git a/kfp/requirements.env b/kfp/requirements.env
index ef5110bcc..6fa707df5 100644
--- a/kfp/requirements.env
+++ b/kfp/requirements.env
@@ -1,2 +1,11 @@
RAY=2.9.3
-KFP=1.8.22
+KFP_v2=2.7.0
+KFP_v1=1.8.22
+
+ifeq ($(KFPv2), 1)
+ KFP=$(KFP_v2)
+ WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support
+else
+ KFP=$(KFP_v1)
+ WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support
+endif
diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py
index f63bb0638..a64154237 100644
--- a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py
+++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py
@@ -1,7 +1,7 @@
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import ONE_WEEK_SEC
+from kfp_support.workflow_support.runtime_utils import ONE_WEEK_SEC
# Components
diff --git a/kind/Makefile b/kind/Makefile
index a20a8ea76..da22e24f3 100644
--- a/kind/Makefile
+++ b/kind/Makefile
@@ -2,7 +2,13 @@
# know where they are running from.
export REPOROOT=${CURDIR}/../
-IGNORE := $(shell bash -c "sed -n /=/p ${REPOROOT}/kind/requirements.env | sed 's/=/:=/' | sed 's/^/export /' > makeenv")
+
+ifneq ($(KFPv2), 1)
+ GREP_V=KFP_V2
+else
+ GREP_V=KFP_V1
+endif
+IGNORE := $(shell bash -c "sed -n /=/p ${REPOROOT}/kind/requirements.env | sed 's/=/:=/;/$(GREP_V)/d;s/KFP_V._//;s/^/export /' > makeenv")
include makeenv
diff --git a/kind/README.md b/kind/README.md
index e4a106f25..60fae6841 100644
--- a/kind/README.md
+++ b/kind/README.md
@@ -28,7 +28,10 @@ amount of node, modify [cluster configuration](hack/kind-cluster-config.yaml)
Install [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/v1/installation/standalone-deployment/#deploying-kubeflow-pipelines) and wait for it to be ready:
```shell
-cd $ROOT_DIR/hack/tools/ && PIPELINE_VERSION=1.8.5 ./install_kubeflow.sh deploy && cd -
+# Set required KFP version. You can reference to the latest supported version in the [requirements.env](./requirements.env) file.
+# Currently, we support 1.8.5 for KFPv1 and 2.2.0 for KFP v2
+export PIPELINE_VERSION=1.8.5
+cd $ROOT_DIR/hack/tools/ && ./install_kubeflow.sh deploy && cd -
kubectl wait --for=condition=ready --all pod -n kubeflow --timeout=300s
```
@@ -56,7 +59,7 @@ kubectl wait --namespace ingress-nginx \
--timeout=90s
```
-To deploy the ingress for ray apiserver, kfp and Minio execute the following:
+To deploy the ingress for Ray API Server, KFP and MinIO execute the following:
```shell
kubectl apply -f $ROOT_DIR/hack/ray_api_server_ingress.yaml
kubectl apply -f $ROOT_DIR/hack/kfp_ingress.yaml
diff --git a/kind/requirements.env b/kind/requirements.env
index 70eca5bd8..cedd6ba0e 100644
--- a/kind/requirements.env
+++ b/kind/requirements.env
@@ -1,4 +1,6 @@
-PIPELINE_VERSION=1.8.5
+KFP_V1_PIPELINE_VERSION=1.8.5
+KFP_V2_PIPELINE_VERSION=2.2.0
+
KUBERAY_OPERATOR=1.0.0
KUBERAY_APISERVER=1.1.0
diff --git a/transforms/.make.workflows b/transforms/.make.workflows
index 4d0d47617..4a9d0d0a8 100644
--- a/transforms/.make.workflows
+++ b/transforms/.make.workflows
@@ -6,23 +6,15 @@ include ${REPOROOT}/kfp/requirements.env
include ${REPOROOT}/.make.defaults
USE_DEV_IMAGES ?= 1
-TRANSFORM_RUNTIME = ray
define set_env_var
$(eval export $(1)=$(2))
endef
+# FIXME
.PHONY: .transforms_workflows.reconcile-requirements
.transforms_workflows.reconcile-requirements:
- cd ${REPOROOT}/kfp/kfp_ray_components && $(MAKE) reconcile-requirements
- @while IFS= read -r line; do \
- [ -z "$$line" ] && continue; \
- [[ $$line == *#* ]] && continue; \
- export DOCKER_IMAGE_NAME=$$(echo $$line |cut -d "=" -f 1 |sed "s/_VERSION//" |tr '[:upper:]' '[:lower:]'); \
- export DOCKER_IMAGE_VERSION=$$(echo $$line |cut -d "=" -f 2); \
- sed -i.back "s/data-prep-kit\/$$DOCKER_IMAGE_NAME\-${TRANSFORM_RUNTIME}:.*/data-prep-kit\/$$DOCKER_IMAGE_NAME\-${TRANSFORM_RUNTIME}:$$DOCKER_IMAGE_VERSION\"/" $$PIPELINE_FILE ;\
- done < ${REPOROOT}/.make.versions
- @sed -i.back "s/kfp-data-processing:.*/kfp-data-processing:${KFP_DOCKER_VERSION}\"/" ${PIPELINE_FILE}
+
.PHONY: .transforms_workflows.compile-pipeline
.transforms_workflows.compile-pipeline:
@@ -45,20 +37,21 @@ ifeq ($(USE_DEV_IMAGES), 1)
cd ${TRANSFORM_SRC} && $(MAKE) image && $(MAKE) load-image
cd ${REPOROOT}/kfp/kfp_ray_components && $(MAKE) image && $(MAKE) load-image
endif
- . ${WORKFLOW_VENV_ACTIVATE} && ${PYTHON} -m kfp_support.workflow_support.utils.pipelines_tests_utils -c "sanity-test" -p ${CURDIR}/${PIPELINE_FILE}
+ . ${WORKFLOW_VENV_ACTIVATE} && ${PYTHON} -m workflow_support.pipeline_utils.pipelines_tests_utils -c "sanity-test" -p ${CURDIR}/${PIPELINE_FILE}
${WORKFLOW_VENV_ACTIVATE}: ${REPOROOT}/.make.versions ${REPOROOT}/kfp/requirements.env ${REPOROOT}/kfp/kfp_ray_components/requirements.txt ${DPK_RAY_LIB_DIR} ${REPOROOT}/kfp/kfp_support_lib/
rm -rf ${REPOROOT}/transforms/venv
$(MAKE) -C ${REPOROOT}/transforms .defaults.python-lib-src-venv
. ${WORKFLOW_VENV_ACTIVATE}; \
- pip install -e $(REPOROOT)/kfp/kfp_support_lib/;
+ pip install -e $(REPOROOT)/kfp/kfp_support_lib/python_apiserver_client; \
+ pip install -e $(REPOROOT)/kfp/kfp_support_lib/$(WORKFLOW_SUPPORT_LIB);
@# Help: Create the virtual environment common to all workflows
-
+
.PHONY: .transforms_workflows.upload-pipeline
.transforms_workflows.upload-pipeline:
$(call set_env_var, CLUSTER_EXISTS, $(shell kind get clusters | grep ${KIND_CLUSTER_NAME}))
@if [ -z ${CLUSTER_EXISTS} ]; then \
cd ${REPOROOT} && make setup; \
fi
- . ${WORKFLOW_VENV_ACTIVATE} && ${PYTHON} -m kfp_support.workflow_support.utils.pipelines_tests_utils -c "upload" -p ${CURDIR}/${PIPELINE_FILE}
+ . ${WORKFLOW_VENV_ACTIVATE} && ${PYTHON} -m workflow_support.pipeline_utils.pipelines_tests_utils -c "upload" -p ${CURDIR}/${PIPELINE_FILE}
diff --git a/transforms/code/code_quality/Makefile b/transforms/code/code_quality/Makefile
index 14c9b098a..5cc85aab2 100644
--- a/transforms/code/code_quality/Makefile
+++ b/transforms/code/code_quality/Makefile
@@ -47,21 +47,21 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/code/code_quality/kfp_ray/Makefile b/transforms/code/code_quality/kfp_ray/Makefile
new file mode 100644
index 000000000..d93c668c1
--- /dev/null
+++ b/transforms/code/code_quality/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=code_quality_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/code/code_quality/kfp_ray/v1/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py
similarity index 66%
rename from transforms/code/code_quality/kfp_ray/v1/code_quality_wf.py
rename to transforms/code/code_quality/kfp_ray/code_quality_wf.py
index 09b1e6cb7..b89f74083 100644
--- a/transforms/code/code_quality/kfp_ray/v1/code_quality_wf.py
+++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py
@@ -9,35 +9,83 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
-
-# NOTE: This file is auto generated by Pipeline Generator.
+import os
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
# the name of the job script
EXEC_SCRIPT_NAME: str = "code_quality_transform_ray.py"
PREFIX: str = ""
-task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.4.0"
+task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.4.0.dev6"
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+# path to kfp component specifications files
+component_spec_path = "../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ cq_contents_column_name: str,
+ cq_language_column_name: str,
+ cq_tokenizer: str,
+ cq_hf_token: str,
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "cq_contents_column_name": cq_contents_column_name,
+ "cq_language_column_name": cq_language_column_name,
+ "cq_tokenizer": cq_tokenizer,
+ "cq_hf_token": cq_hf_token,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -120,7 +168,7 @@ def code_quality(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -128,7 +176,18 @@ def code_quality(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ cq_contents_column_name=cq_contents_column_name,
+ cq_language_column_name=cq_language_column_name,
+ cq_tokenizer=cq_tokenizer,
+ cq_hf_token=cq_hf_token,
)
+
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
ray_cluster = create_ray_op(
@@ -147,19 +206,8 @@ def code_quality(
ray_name=ray_name,
run_id=dsl.RUN_ID_PLACEHOLDER,
additional_params=additional_params,
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "cq_contents_column_name": cq_contents_column_name,
- "cq_language_column_name": cq_language_column_name,
- "cq_tokenizer": cq_tokenizer,
- "cq_hf_token": cq_hf_token,
- },
+ # note that the parameters below are specific for NOOP transform
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
diff --git a/transforms/code/code_quality/kfp_ray/v1/Makefile b/transforms/code/code_quality/kfp_ray/v1/Makefile
deleted file mode 100644
index ae484ed12..000000000
--- a/transforms/code/code_quality/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=code_quality_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=code_quality_wf.py
diff --git a/transforms/code/malware/Makefile b/transforms/code/malware/Makefile
index 14c9b098a..41413c041 100644
--- a/transforms/code/malware/Makefile
+++ b/transforms/code/malware/Makefile
@@ -47,21 +47,20 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
-
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/code/malware/kfp_ray/Makefile b/transforms/code/malware/kfp_ray/Makefile
new file mode 100644
index 000000000..ce744072d
--- /dev/null
+++ b/transforms/code/malware/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=malware_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/code/malware/kfp_ray/v1/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py
similarity index 67%
rename from transforms/code/malware/kfp_ray/v1/malware_wf.py
rename to transforms/code/malware/kfp_ray/malware_wf.py
index 7f65b3a9c..d0e22643b 100644
--- a/transforms/code/malware/kfp_ray/v1/malware_wf.py
+++ b/transforms/code/malware/kfp_ray/malware_wf.py
@@ -10,32 +10,78 @@
# limitations under the License.
################################################################################
+import os
+
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
+
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
# the name of the job script
EXEC_SCRIPT_NAME: str = "malware_transform_ray.py"
-task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.5.0"
+task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.5.0.dev6"
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ malware_input_column: str,
+ malware_output_column: str,
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "malware_input_column": malware_input_column,
+ "malware_output_column": malware_output_column,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -107,7 +153,7 @@ def malware(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -115,6 +161,14 @@ def malware(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ malware_input_column=malware_input_column,
+ malware_output_column=malware_output_column,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
@@ -134,18 +188,7 @@ def malware(
run_id=dsl.RUN_ID_PLACEHOLDER,
additional_params=additional_params,
# note that the parameters below are specific for malware transform
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "malware_input_column": malware_input_column,
- "malware_output_column": malware_output_column,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
diff --git a/transforms/code/malware/kfp_ray/v1/Makefile b/transforms/code/malware/kfp_ray/v1/Makefile
deleted file mode 100644
index d673ca682..000000000
--- a/transforms/code/malware/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=malware_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=malware_wf.py
diff --git a/transforms/code/proglang_select/Makefile b/transforms/code/proglang_select/Makefile
index 4991e4002..e7ad671da 100644
--- a/transforms/code/proglang_select/Makefile
+++ b/transforms/code/proglang_select/Makefile
@@ -47,20 +47,21 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
+
diff --git a/transforms/code/proglang_select/kfp_ray/Makefile b/transforms/code/proglang_select/kfp_ray/Makefile
new file mode 100644
index 000000000..2bdfb2d1d
--- /dev/null
+++ b/transforms/code/proglang_select/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=proglang_select_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/code/proglang_select/kfp_ray/v1/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py
similarity index 68%
rename from transforms/code/proglang_select/kfp_ray/v1/proglang_select_wf.py
rename to transforms/code/proglang_select/kfp_ray/proglang_select_wf.py
index 14b17ac32..ad256903f 100644
--- a/transforms/code/proglang_select/kfp_ray/v1/proglang_select_wf.py
+++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py
@@ -10,32 +10,78 @@
# limitations under the License.
################################################################################
+import os
+
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
+
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
# the name of the job script
EXEC_SCRIPT_NAME: str = "proglang_select_transform_ray.py"
-task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.4.0"
+task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.4.0.dev6"
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
+
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ proglang_select_allowed_langs_file: str,
+ proglang_select_language_column: str,
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "proglang_select_allowed_langs_file": proglang_select_allowed_langs_file,
+ "proglang_select_language_column": proglang_select_language_column,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -111,7 +157,7 @@ def lang_select(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -119,6 +165,14 @@ def lang_select(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ proglang_select_allowed_langs_file=proglang_select_allowed_langs_file,
+ proglang_select_language_column=proglang_select_language_column,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
@@ -137,19 +191,8 @@ def lang_select(
ray_name=ray_name,
run_id=dsl.RUN_ID_PLACEHOLDER,
additional_params=additional_params,
- # note that the parameters below are specific for NOOP transform
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "proglang_select_allowed_langs_file": proglang_select_allowed_langs_file,
- "proglang_select_language_column": proglang_select_language_column,
- },
+ # note that the parameters below are specific for this transform
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
prefix=PREFIX,
diff --git a/transforms/code/proglang_select/kfp_ray/v1/Makefile b/transforms/code/proglang_select/kfp_ray/v1/Makefile
deleted file mode 100644
index e2c8c8b14..000000000
--- a/transforms/code/proglang_select/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=proglang_select_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=proglang_select_wf.py
diff --git a/transforms/universal/doc_id/Makefile b/transforms/universal/doc_id/Makefile
index b7c9b04b3..da86986db 100644
--- a/transforms/universal/doc_id/Makefile
+++ b/transforms/universal/doc_id/Makefile
@@ -47,20 +47,20 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/universal/doc_id/kfp_ray/Makefile b/transforms/universal/doc_id/kfp_ray/Makefile
new file mode 100644
index 000000000..54b7e3781
--- /dev/null
+++ b/transforms/universal/doc_id/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=doc_id_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/universal/doc_id/kfp_ray/v1/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py
similarity index 63%
rename from transforms/universal/doc_id/kfp_ray/v1/doc_id_wf.py
rename to transforms/universal/doc_id/kfp_ray/doc_id_wf.py
index d7cbb11dd..5cbb3e974 100644
--- a/transforms/universal/doc_id/kfp_ray/v1/doc_id_wf.py
+++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py
@@ -9,40 +9,91 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
+import os
+
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
-task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0"
+task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6"
# the name of the job script
EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py"
-
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ data_checkpointing: bool,
+ data_data_sets: str,
+ data_files_to_use: str,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ doc_id_doc_column: str,
+ doc_id_hash_column: str,
+ doc_id_int_column: str,
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "data_checkpointing": data_checkpointing,
+ "data_data_sets": data_data_sets,
+ "data_files_to_use": data_files_to_use,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "doc_id_doc_column": doc_id_doc_column,
+ "doc_id_hash_column": doc_id_hash_column,
+ "doc_id_int_column": doc_id_int_column,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
execute_ray_jobs_op = comp.load_component_from_file(component_spec_path + "executeRayJobComponent.yaml")
# clean up Ray
cleanup_ray_op = comp.load_component_from_file(component_spec_path + "deleteRayClusterComponent.yaml")
-
# Task name is part of the pipeline name, the ray cluster name and the job name in DMF.
TASK_NAME: str = "doc_id"
@@ -114,7 +165,7 @@ def doc_id(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -122,12 +173,24 @@ def doc_id(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ data_checkpointing=data_checkpointing,
+ data_data_sets=data_data_sets,
+ data_files_to_use=data_files_to_use,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ doc_id_doc_column=doc_id_doc_column,
+ doc_id_hash_column=doc_id_hash_column,
+ doc_id_int_column=doc_id_int_column,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
ray_cluster = create_ray_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
ray_head_options=ray_head_options,
ray_worker_options=ray_worker_options,
server_url=server_url,
@@ -138,25 +201,10 @@ def doc_id(
# Execute job
execute_job = execute_ray_jobs_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
additional_params=additional_params,
# note that the parameters below are specific for NOOP transform
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "data_checkpointing": data_checkpointing,
- "data_data_sets": data_data_sets,
- "data_files_to_use": data_files_to_use,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "doc_id_doc_column": doc_id_doc_column,
- "doc_id_hash_column": doc_id_hash_column,
- "doc_id_int_column": doc_id_int_column,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
@@ -164,8 +212,9 @@ def doc_id(
ComponentUtils.set_s3_env_vars_to_component(execute_job, data_s3_access_secret)
execute_job.after(ray_cluster)
+ # TODO
# Configure the pipeline level to one week (in seconds)
- dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
+ # dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
if __name__ == "__main__":
diff --git a/transforms/universal/doc_id/kfp_ray/v1/Makefile b/transforms/universal/doc_id/kfp_ray/v1/Makefile
deleted file mode 100644
index e33049af4..000000000
--- a/transforms/universal/doc_id/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=doc_id_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=doc_id_wf.py
diff --git a/transforms/universal/ededup/Makefile b/transforms/universal/ededup/Makefile
index 7735c3251..a766f453e 100644
--- a/transforms/universal/ededup/Makefile
+++ b/transforms/universal/ededup/Makefile
@@ -47,20 +47,19 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/universal/ededup/kfp_ray/Makefile b/transforms/universal/ededup/kfp_ray/Makefile
new file mode 100644
index 000000000..235258fd6
--- /dev/null
+++ b/transforms/universal/ededup/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=ededup_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/universal/ededup/kfp_ray/v1/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py
similarity index 74%
rename from transforms/universal/ededup/kfp_ray/v1/ededup_wf.py
rename to transforms/universal/ededup/kfp_ray/ededup_wf.py
index 89682fe6c..6297470e9 100644
--- a/transforms/universal/ededup/kfp_ray/v1/ededup_wf.py
+++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py
@@ -9,31 +9,51 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
+import os
+
+from src.ededup_compute_execution_params import ededup_compute_execution_params
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
-from src.ededup_compute_execution_params import ededup_compute_execution_params
+task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6"
+
# the name of the job script
EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py"
-task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0"
-
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=ededup_compute_execution_params, base_image=base_kfp_image
+ )
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(
+ func=ededup_compute_execution_params, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = dsl.RUN_ID_PLACEHOLDER
-# compute execution parameters
-compute_exec_params_op = comp.func_to_container_op(func=ededup_compute_execution_params, base_image=base_kfp_image)
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -110,7 +130,7 @@ def ededup(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -118,7 +138,14 @@ def ededup(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
- params={"s3_config": data_s3_config, "hash_cpu": ededup_hash_cpu},
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ doc_column=ededup_doc_column,
+ hash_cpu=ededup_hash_cpu,
n_samples=ededup_n_samples,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
@@ -127,7 +154,7 @@ def ededup(
# start Ray cluster
ray_cluster = create_ray_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
ray_head_options=ray_head_options,
ray_worker_options=ray_worker_options,
server_url=server_url,
@@ -138,21 +165,9 @@ def ededup(
# Execute job
execute_job = execute_ray_jobs_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
additional_params=additional_params,
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.outputs["workers"],
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "ededup_doc_column": ededup_doc_column,
- "ededup_hash_cpu": ededup_hash_cpu,
- "ededup_num_hashes": compute_exec_params.outputs["hashes"],
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
@@ -160,8 +175,9 @@ def ededup(
ComponentUtils.set_s3_env_vars_to_component(execute_job, data_s3_access_secret)
execute_job.after(ray_cluster)
+ # TODO
# Configure the pipeline level to one week (in seconds)
- dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
+ # dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
if __name__ == "__main__":
diff --git a/transforms/universal/ededup/kfp_ray/v1/src/ededup_compute_execution_params.py b/transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py
similarity index 71%
rename from transforms/universal/ededup/kfp_ray/v1/src/ededup_compute_execution_params.py
rename to transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py
index 529a6ace3..16a5a0c28 100644
--- a/transforms/universal/ededup/kfp_ray/v1/src/ededup_compute_execution_params.py
+++ b/transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py
@@ -10,24 +10,37 @@
# limitations under the License.
################################################################################
-from typing import Any, NamedTuple
+from typing import Any
def ededup_compute_execution_params(
worker_options: str, # ray worker configuration
actor_options: str, # actor's resource requirements
- params: dict[str, Any], # exact dedup specific parameters
- n_samples: int = 10, # number of samples to use
-) -> NamedTuple("Output", [("workers", int), ("hashes", int)]):
+ data_s3_config: str, # s3 configuration
+ data_max_files: int, # max files to process
+ data_num_samples: int, # num samples to process
+ runtime_pipeline_id: str, # pipeline id
+ runtime_job_id: str, # job id
+ runtime_code_location: str, # code location
+ doc_column: str, # key for accessing data
+ hash_cpu: float, # number of CPUs per hash
+ n_samples: int, # number of samples for parameters computation
+) -> dict:
"""
Compute exact dedup execution parameters
:param worker_options: cluster parameters
:param actor_options: actor request requirements
:param n_samples: number of samples to use
- :param params: exact dedup specific parameters containing the following keys:
- s3_config - s3 config
- hash_cpu - hash cpu requirements
- :return: json string, containing computed number of workers and hashes
+ :param data_s3_config - s3 config
+ :param data_max_files - max files to process
+ :param data_num_samples - num samples to process
+ :param runtime_pipeline_id - pipeline id
+ :param runtime_job_id - job id, or just a unique string
+ :param runtime_code_location - code location
+ :param doc_column - key for accessing data
+ :param hash_cpu - number of CPUs per hash
+ :param n_samples - umber of samples for parameters computation
+ :return: a dictionary with a Ray Job execution parameters
"""
# required import
import math
@@ -35,7 +48,7 @@ def ededup_compute_execution_params(
from data_processing.data_access import DataAccessS3
from data_processing.utils import GB, KB
- from kfp_support.workflow_support.utils import KFPUtils
+ from workflow_support.runtime_utils import KFPUtils
EXECUTION_OF_KB_DOC = 0.00025
@@ -53,7 +66,7 @@ def ededup_compute_execution_params(
# get credentials
s3_key, s3_secret, s3_endpoint = KFPUtils.credentials()
s3_creds = {"access_key": s3_key, "secret_key": s3_secret, "url": s3_endpoint}
- s3_config = KFPUtils.load_from_json(params.get("s3_config", {}).replace("'", '"'))
+ s3_config = KFPUtils.load_from_json(data_s3_config.replace("'", '"'))
if type(s3_config) is list:
# S3 config is list. take the first element
s3_config = s3_config[0]
@@ -71,7 +84,6 @@ def ededup_compute_execution_params(
n_hashes = math.ceil(number_of_docs * 32 / GB)
print(f"Estimated Required hashes {n_hashes}")
print(f"Cluster available CPUs {cluster_cpu}, Memory {cluster_memory}")
- hash_cpu: float = float(params.get("hash_cpu"))
required_hash_cpu = n_hashes * hash_cpu
required_hash_mem = n_hashes * 2
if required_hash_cpu > cluster_cpu or required_hash_mem > cluster_memory:
@@ -97,6 +109,16 @@ def ededup_compute_execution_params(
print(f"Try to increase the size of the cluster or increase size of the cpu per worker")
sys.exit(1)
print(f"Projected execution time {EXECUTION_OF_KB_DOC * avg_doc_size * number_of_docs / n_workers / 60} min")
- # return json.dumps({"workers": n_workers, "hashes": n_hashes})
- return (n_workers, n_hashes)
- # return (1, 1)
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": n_workers,
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "ededup_doc_column": doc_column,
+ "ededup_hash_cpu": hash_cpu,
+ "ededup_num_hashes": n_hashes,
+ }
diff --git a/transforms/universal/ededup/kfp_ray/v1/Makefile b/transforms/universal/ededup/kfp_ray/v1/Makefile
deleted file mode 100644
index 66331ebfb..000000000
--- a/transforms/universal/ededup/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=ededup_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=ededup_wf.py
diff --git a/transforms/universal/fdedup/Makefile b/transforms/universal/fdedup/Makefile
index 7735c3251..41413c041 100644
--- a/transforms/universal/fdedup/Makefile
+++ b/transforms/universal/fdedup/Makefile
@@ -47,20 +47,20 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/universal/fdedup/kfp_ray/Makefile b/transforms/universal/fdedup/kfp_ray/Makefile
new file mode 100644
index 000000000..f741801bc
--- /dev/null
+++ b/transforms/universal/fdedup/kfp_ray/Makefile
@@ -0,0 +1,40 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=fdedup_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/universal/fdedup/kfp_ray/v1/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py
similarity index 73%
rename from transforms/universal/fdedup/kfp_ray/v1/fdedup_wf.py
rename to transforms/universal/fdedup/kfp_ray/fdedup_wf.py
index d0feea492..c3e21a85b 100644
--- a/transforms/universal/fdedup/kfp_ray/v1/fdedup_wf.py
+++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py
@@ -9,31 +9,51 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
+import os
+
+from src.fdedup_compute_execution_params import fdedup_compute_execution_params
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
-from src.fdedup_compute_execution_params import fdedup_compute_execution_params
+task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6"
+
# the name of the job script
EXEC_SCRIPT_NAME: str = "fdedup_transform_ray.py"
-task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0"
-
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=fdedup_compute_execution_params, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(
+ func=fdedup_compute_execution_params, base_image=base_kfp_image
+ )
+ run_id = dsl.RUN_ID_PLACEHOLDER
-# compute execution parameters
-compute_exec_params_op = comp.func_to_container_op(func=fdedup_compute_execution_params, base_image=base_kfp_image)
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -139,7 +159,7 @@ def fdedup(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -147,14 +167,26 @@ def fdedup(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
- params={
- "threshold": fdedup_threshold,
- "num_permutations": fdedup_num_permutations,
- "s3_config": data_s3_config,
- "bucket_cpu": fdedup_bucket_cpu,
- "doc_cpu": fdedup_doc_cpu,
- "minhash_cpu": fdedup_mhash_cpu,
- },
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ doc_column=fdedup_doc_column,
+ id_column=fdedup_id_column,
+ cluster_column=fdedup_cluster_column,
+ bucket_cpu=fdedup_bucket_cpu,
+ doc_cpu=fdedup_doc_cpu,
+ mhash_cpu=fdedup_mhash_cpu,
+ num_permutations=fdedup_num_permutations,
+ threshold=fdedup_threshold,
+ shingles_size=fdedup_shingles_size,
+ delimiters=fdedup_delimiters,
+ random_delay_limit=fdedup_random_delay_limit,
+ snapshot_delay=fdedup_snapshot_delay,
+ use_doc_snapshot=fdedup_use_doc_snapshot,
+ use_bucket_snapshot=fdedup_use_bucket_snapshot,
n_samples=fdedup_n_samples,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
@@ -163,7 +195,7 @@ def fdedup(
# start Ray cluster
ray_cluster = create_ray_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
ray_head_options=ray_head_options,
ray_worker_options=ray_worker_options,
server_url=server_url,
@@ -174,36 +206,9 @@ def fdedup(
# Execute job
execute_job = execute_ray_jobs_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
additional_params=additional_params,
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.outputs["workers"],
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "fdedup_doc_column": fdedup_doc_column,
- "fdedup_id_column": fdedup_id_column,
- "fdedup_cluster_column": fdedup_cluster_column,
- "fdedup_bucket_cpu": fdedup_bucket_cpu,
- "fdedup_doc_cpu": fdedup_doc_cpu,
- "fdedup_mhash_cpu": fdedup_mhash_cpu,
- "fdedup_num_doc_actors": compute_exec_params.outputs["docs"],
- "fdedup_num_bucket_actors": compute_exec_params.outputs["buckets"],
- "fdedup_num_minhash_actors": compute_exec_params.outputs["min_hashes"],
- "fdedup_num_preprocessors": compute_exec_params.outputs["preprocessors"],
- "fdedup_num_permutations": fdedup_num_permutations,
- "fdedup_threshold": fdedup_threshold,
- "fdedup_shingles_size": fdedup_shingles_size,
- "fdedup_delimiters": fdedup_delimiters,
- "fdedup_random_delay_limit": fdedup_random_delay_limit,
- "fdedup_snapshot_delay": fdedup_snapshot_delay,
- "fdedup_use_doc_snapshot": fdedup_use_doc_snapshot,
- "fdedup_use_bucket_snapshot": fdedup_use_bucket_snapshot,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
@@ -211,8 +216,9 @@ def fdedup(
ComponentUtils.set_s3_env_vars_to_component(execute_job, data_s3_access_secret)
execute_job.after(ray_cluster)
+ # TODO
# Configure the pipeline level to one week (in seconds)
- dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
+ # dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
if __name__ == "__main__":
diff --git a/transforms/universal/fdedup/kfp_ray/v1/src/fdedup_compute_execution_params.py b/transforms/universal/fdedup/kfp_ray/src/fdedup_compute_execution_params.py
similarity index 62%
rename from transforms/universal/fdedup/kfp_ray/v1/src/fdedup_compute_execution_params.py
rename to transforms/universal/fdedup/kfp_ray/src/fdedup_compute_execution_params.py
index a9f8b8d66..9d07940c1 100644
--- a/transforms/universal/fdedup/kfp_ray/v1/src/fdedup_compute_execution_params.py
+++ b/transforms/universal/fdedup/kfp_ray/src/fdedup_compute_execution_params.py
@@ -16,37 +16,65 @@
def fdedup_compute_execution_params(
worker_options: str, # ray worker configuration
actor_options: str, # actor's resource requirements
- params: dict[str, Any], # fuzzy dedup specific parameters
- n_samples: int = 10, # number of samples to use
-) -> NamedTuple(
- "Output", [("workers", int), ("preprocessors", int), ("docs", int), ("buckets", int), ("min_hashes", int)]
-):
+ data_s3_config: str, # s3 configuration
+ data_max_files: int, # max files to process
+ data_num_samples: int, # num samples to process
+ runtime_pipeline_id: str, # pipeline id
+ runtime_job_id: str, # job id
+ runtime_code_location: str, # code location
+ doc_column: str, # document column name
+ id_column: str, # integer document id column name
+ cluster_column: str, # cluster column name
+ bucket_cpu: float, # number of CPUs per bucket hash
+ doc_cpu: float, # number of CPUs per doc hash
+ mhash_cpu: float, # number of CPUs per minhash hash
+ num_permutations: int, # number of permutations
+ threshold: float, # threshold,
+ shingles_size: int, # number of words in shingle
+ delimiters: str, # delimiter for splitting document
+ random_delay_limit: int, # delay between reads to reduce S3 load.
+ # A random number between 0 and random_delay_limit is used
+ snapshot_delay: int, # delay between restoring individual actors
+ use_doc_snapshot: bool, # flag to skip documents building and start from existing snapshots
+ use_bucket_snapshot: bool, # flag to skip buckets building and start from existing snapshots
+ n_samples: int, # number of samples to use
+) -> dict: # NamedTuple(
+ # "Output", [("workers", int), ("preprocessors", int), ("docs", int), ("buckets", int), ("min_hashes", int)]
+
"""
Compute fuzzy dedup execution parameters
:param worker_options: cluster parameters
:param actor_options: actor request requirements
+ :param data_s3_config: s3 configuration
+ :param data_max_files: max files to process
+ :param data_num_samples: num samples to process
+ :param runtime_pipeline_id: pipeline id
+ :param runtime_job_id: job id
+ :param runtime_code_location: code location
+ :param doc_column: document column name
+ :param id_column: integer document id column name
+ :param cluster_column: cluster column name
+ :param bucket_cpu: number of CPUs per bucket hash
+ :param doc_cpu: number of CPUs per doc hash
+ :param mhash_cpu: number of CPUs per minhash hash
+ :param num_permutations: number of permutations
+ :param threshold: threshold,
+ :param shingles_size: number of words in shingle
+ :param delimiters: delimiter for splitting document
+ :param random_delay_limit: # delay between reads to reduce S3 load. A random number between 0 and random_delay_limit is used
+ :param snapshot_delay: delay between restoring individual actors
+ :param use_doc_snapshot: flag to skip documents building and start from existing snapshots
+ :param use_bucket_snapshot: flag to skip buckets building and start from existing snapshots
:param n_samples: number of samples to use
- :param params: fuzzy dedup specific parameters containing the following keys:
- threshold - threshold for fuzzy computations
- num_permutations - number of permutation
- s3_config - s3 config
- bucket_cpu - bucket actor cpu requirements
- minhash_cpu - minhash actor cpu requirements
- doc_cpu - doc actor cpu requirements
- :return: json string, containing
- workers - number of workers
- preprocessors - number of preprocessors
- docs - number of doc actors
- buckets - number of bucket actors
- min_hashes - number of minhash actors
+ :return: a dictionary with a Ray Job execution parameters
"""
import math
import sys
from data_processing.data_access import DataAccessS3
from data_processing.utils import GB, KB
- from kfp_support.workflow_support.utils import KFPUtils
from scipy.integrate import quad as integrate
+ from workflow_support.runtime_utils import KFPUtils
EXECUTION_OF_KB_DOC = 0.003
@@ -104,8 +132,8 @@ def _false_negative_probability(ths: float, b: int, r: int) -> float:
# fuzzy parameters
num_buckets, length_bucket = fuzzy_optimal_param(
- threshold=float(params.get("threshold")),
- num_perm=int(params.get("num_permutations")),
+ threshold=threshold,
+ num_perm=num_permutations,
false_positive_weight=0.5,
false_negative_weight=0.5,
)
@@ -124,7 +152,7 @@ def _false_negative_probability(ths: float, b: int, r: int) -> float:
# get credentials
s3_key, s3_secret, s3_endpoint = KFPUtils.credentials()
s3_creds = {"access_key": s3_key, "secret_key": s3_secret, "url": s3_endpoint}
- s3_config = KFPUtils.load_from_json(params.get("s3_config", {}).replace("'", '"'))
+ s3_config = KFPUtils.load_from_json(data_s3_config.replace("'", '"'))
if type(s3_config) is list:
# S3 config is list. take the first element
s3_config = s3_config[0]
@@ -143,13 +171,10 @@ def _false_negative_probability(ths: float, b: int, r: int) -> float:
d_actors = math.ceil(number_of_docs * 48 * 1.1 / GB)
m_actors = math.ceil(number_of_docs * 128 * 1.1 / GB)
# compute cpu requirements
- bucket_cpu = float(params.get("bucket_cpu"))
- min_hash_cpu = float(params.get("minhash_cpu"))
- doc_cpu = float(params.get("doc_cpu"))
# Define number of preprocessors. We are assuming that preprocessors and workers are using the same amount
# of CPUs
n_preprocessors = int(
- (0.85 * cluster_cpu - b_actors * bucket_cpu - m_actors * min_hash_cpu - d_actors * doc_cpu) / actor_cpu
+ (0.85 * cluster_cpu - b_actors * bucket_cpu - m_actors * mhash_cpu - d_actors * doc_cpu) / actor_cpu
)
if n_preprocessors < 0:
print(f"Not enough CPUs to run fuzzy de duping, computed number of workers is {n_preprocessors}")
@@ -176,9 +201,36 @@ def _false_negative_probability(ths: float, b: int, r: int) -> float:
print(
f"Required cpu : "
- f"{b_actors * bucket_cpu + m_actors * min_hash_cpu + d_actors * doc_cpu + n_workers * actor_cpu}"
+ f"{b_actors * bucket_cpu + m_actors * mhash_cpu + d_actors * doc_cpu + n_workers * actor_cpu}"
)
projected_execution = EXECUTION_OF_KB_DOC * avg_doc_size * number_of_docs / n_workers / 60
print(f"Projected execution time {projected_execution} min")
- return (n_workers, n_preprocessors, d_actors, b_actors, m_actors)
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": n_workers,
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "fdedup_doc_column": doc_column,
+ "fdedup_id_column": id_column,
+ "fdedup_cluster_column": cluster_column,
+ "fdedup_bucket_cpu": bucket_cpu,
+ "fdedup_doc_cpu": doc_cpu,
+ "fdedup_mhash_cpu": mhash_cpu,
+ "fdedup_num_doc_actors": d_actors,
+ "fdedup_num_bucket_actors": b_actors,
+ "fdedup_num_minhash_actors": m_actors,
+ "fdedup_num_preprocessors": n_preprocessors,
+ "fdedup_num_permutations": num_permutations,
+ "fdedup_threshold": threshold,
+ "fdedup_shingles_size": shingles_size,
+ "fdedup_delimiters": delimiters,
+ "fdedup_random_delay_limit": random_delay_limit,
+ "fdedup_snapshot_delay": snapshot_delay,
+ "fdedup_use_doc_snapshot": use_doc_snapshot,
+ "fdedup_use_bucket_snapshot": use_bucket_snapshot,
+ }
diff --git a/transforms/universal/fdedup/kfp_ray/v1/Makefile b/transforms/universal/fdedup/kfp_ray/v1/Makefile
deleted file mode 100644
index 8a82e5d18..000000000
--- a/transforms/universal/fdedup/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=fdedup_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=fdedup_wf.py
diff --git a/transforms/universal/filter/Makefile b/transforms/universal/filter/Makefile
index 4991e4002..6104574ea 100644
--- a/transforms/universal/filter/Makefile
+++ b/transforms/universal/filter/Makefile
@@ -47,20 +47,20 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/universal/filter/kfp_ray/Makefile b/transforms/universal/filter/kfp_ray/Makefile
new file mode 100644
index 000000000..4d8779a25
--- /dev/null
+++ b/transforms/universal/filter/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=filter_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/universal/filter/kfp_ray/v1/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py
similarity index 67%
rename from transforms/universal/filter/kfp_ray/v1/filter_wf.py
rename to transforms/universal/filter/kfp_ray/filter_wf.py
index d4a413dab..90d2b197b 100644
--- a/transforms/universal/filter/kfp_ray/v1/filter_wf.py
+++ b/transforms/universal/filter/kfp_ray/filter_wf.py
@@ -10,35 +10,80 @@
# limitations under the License.
################################################################################
-# NOTE: This file is auto generated by Pipeline Generator.
+import os
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
# the name of the job script
EXEC_SCRIPT_NAME: str = "filter_transform_ray.py"
PREFIX: str = ""
-task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.4.0"
+task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.4.0.dev6"
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ filter_criteria_list: str,
+ filter_logical_operator: str,
+ filter_columns_to_drop: str,
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "filter_criteria_list": filter_criteria_list,
+ "filter_logical_operator": filter_logical_operator,
+ "filter_columns_to_drop": filter_columns_to_drop,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -113,7 +158,7 @@ def filtering(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -121,7 +166,17 @@ def filtering(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ filter_criteria_list=filter_criteria_list,
+ filter_logical_operator=filter_logical_operator,
+ filter_columns_to_drop=filter_columns_to_drop,
)
+
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
ray_cluster = create_ray_op(
@@ -140,19 +195,7 @@ def filtering(
ray_name=ray_name,
run_id=dsl.RUN_ID_PLACEHOLDER,
additional_params=additional_params,
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "filter_criteria_list": filter_criteria_list,
- "filter_logical_operator": filter_logical_operator,
- "filter_columns_to_drop": filter_columns_to_drop,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
diff --git a/transforms/universal/filter/kfp_ray/v1/Makefile b/transforms/universal/filter/kfp_ray/v1/Makefile
deleted file mode 100644
index b7696b246..000000000
--- a/transforms/universal/filter/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=filter_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=filter_wf.py
diff --git a/transforms/universal/noop/Makefile b/transforms/universal/noop/Makefile
index 7735c3251..41413c041 100644
--- a/transforms/universal/noop/Makefile
+++ b/transforms/universal/noop/Makefile
@@ -47,20 +47,20 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/universal/noop/kfp_ray/v1/Makefile b/transforms/universal/noop/kfp_ray/Makefile
similarity index 84%
rename from transforms/universal/noop/kfp_ray/v1/Makefile
rename to transforms/universal/noop/kfp_ray/Makefile
index 0a9f8a6d4..4f1d5ee7c 100644
--- a/transforms/universal/noop/kfp_ray/v1/Makefile
+++ b/transforms/universal/noop/kfp_ray/Makefile
@@ -1,19 +1,31 @@
-REPOROOT=${CURDIR}/../../../../../
+REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
-SRC_DIR=${CURDIR}/../../ray/
+SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
.PHONY: workflow-build
workflow-build: workflow-venv
- @for file in $(YAML_WF); do \
- $(MAKE) $$file; \
- done
+ $(MAKE) $(YAML_WF)
.PHONY: workflow-test
workflow-test: workflow-build
diff --git a/transforms/universal/noop/kfp_ray/v1/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py
similarity index 68%
rename from transforms/universal/noop/kfp_ray/v1/noop_multiple_wf.py
rename to transforms/universal/noop/kfp_ray/noop_multiple_wf.py
index cf374c8af..67b4aead0 100644
--- a/transforms/universal/noop/kfp_ray/v1/noop_multiple_wf.py
+++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py
@@ -9,15 +9,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
+import os
+
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.9.0.dev6"
@@ -29,13 +27,60 @@
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ noop_sleep_sec: int,
+) -> dict:
+ import uuid
+
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "noop_sleep_sec": noop_sleep_sec,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -107,7 +152,7 @@ def noop(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -115,12 +160,19 @@ def noop(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ noop_sleep_sec=noop_sleep_sec,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
ray_cluster = create_ray_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
ray_head_options=ray_head_options,
ray_worker_options=ray_worker_options,
server_url=server_url,
@@ -131,20 +183,10 @@ def noop(
# Execute job
execute_job = execute_ray_jobs_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
additional_params=additional_params,
# note that the parameters below are specific for NOOP transform
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "noop_sleep_sec": noop_sleep_sec,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
@@ -152,8 +194,9 @@ def noop(
ComponentUtils.set_s3_env_vars_to_component(execute_job, data_s3_access_secret)
execute_job.after(ray_cluster)
+ # TODO
# Configure the pipeline level to one week (in seconds)
- dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
+ # dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
if __name__ == "__main__":
diff --git a/transforms/universal/noop/kfp_ray/v1/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py
similarity index 67%
rename from transforms/universal/noop/kfp_ray/v1/noop_wf.py
rename to transforms/universal/noop/kfp_ray/noop_wf.py
index d43b88189..8748a60ca 100644
--- a/transforms/universal/noop/kfp_ray/v1/noop_wf.py
+++ b/transforms/universal/noop/kfp_ray/noop_wf.py
@@ -9,15 +9,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
+import os
+
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.9.0.dev6"
@@ -29,19 +27,64 @@
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
# path to kfp component specifications files
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
+component_spec_path = "../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ noop_sleep_sec: int,
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "noop_sleep_sec": noop_sleep_sec,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print("WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
execute_ray_jobs_op = comp.load_component_from_file(component_spec_path + "executeRayJobComponent.yaml")
# clean up Ray
cleanup_ray_op = comp.load_component_from_file(component_spec_path + "deleteRayClusterComponent.yaml")
+
# Task name is part of the pipeline name, the ray cluster name and the job name in DMF.
TASK_NAME: str = "noop"
@@ -53,7 +96,7 @@
def noop(
# Ray cluster
ray_name: str = "noop-kfp-ray", # name of Ray cluster
- ray_head_options: str = '{"cpu": 1, "memory": 4, "image_pull_secret": "", ' '"image": "' + task_image + '"}',
+ ray_head_options: str = '{"cpu": 1, "memory": 4, "image_pull_secret": "", "image": "' + task_image + '" }',
ray_worker_options: str = '{"replicas": 2, "max_replicas": 2, "min_replicas": 2, "cpu": 2, "memory": 4, '
'"image_pull_secret": "", "image": "' + task_image + '"}',
server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888",
@@ -106,7 +149,7 @@ def noop(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -114,12 +157,19 @@ def noop(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ noop_sleep_sec=noop_sleep_sec,
)
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
# start Ray cluster
ray_cluster = create_ray_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
ray_head_options=ray_head_options,
ray_worker_options=ray_worker_options,
server_url=server_url,
@@ -130,20 +180,10 @@ def noop(
# Execute job
execute_job = execute_ray_jobs_op(
ray_name=ray_name,
- run_id=dsl.RUN_ID_PLACEHOLDER,
+ run_id=run_id,
additional_params=additional_params,
# note that the parameters below are specific for NOOP transform
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "noop_sleep_sec": noop_sleep_sec,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
@@ -151,8 +191,9 @@ def noop(
ComponentUtils.set_s3_env_vars_to_component(execute_job, data_s3_access_secret)
execute_job.after(ray_cluster)
+ # TODO
# Configure the pipeline level to one week (in seconds)
- dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
+ # dsl.get_pipeline_conf().set_timeout(ONE_WEEK_SEC)
if __name__ == "__main__":
diff --git a/transforms/universal/tokenization/Makefile b/transforms/universal/tokenization/Makefile
index 7735c3251..41413c041 100644
--- a/transforms/universal/tokenization/Makefile
+++ b/transforms/universal/tokenization/Makefile
@@ -47,20 +47,20 @@ load-image::
.PHONY: workflow-venv
workflow-venv:
- $(MAKE) -C kfp_ray/v1 workflow-venv
+ $(MAKE) -C kfp_ray workflow-venv
.PHONY: workflow-build
workflow-build:
- $(MAKE) -C kfp_ray/v1 workflow-build
+ $(MAKE) -C kfp_ray workflow-build
.PHONY: workflow-test
workflow-test:
- $(MAKE) -C kfp_ray/v1 workflow-test
+ $(MAKE) -C kfp_ray workflow-test
.PHONY: workflow-upload
workflow-upload:
- $(MAKE) -C kfp_ray/v1 workflow-upload
+ $(MAKE) -C kfp_ray workflow-upload
.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
- $(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
+ $(MAKE) -C kfp_ray workflow-reconcile-requirements
diff --git a/transforms/universal/tokenization/kfp_ray/Makefile b/transforms/universal/tokenization/kfp_ray/Makefile
new file mode 100644
index 000000000..7d5aa6687
--- /dev/null
+++ b/transforms/universal/tokenization/kfp_ray/Makefile
@@ -0,0 +1,44 @@
+REPOROOT=${CURDIR}/../../../../
+WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
+include $(REPOROOT)/transforms/.make.workflows
+
+SRC_DIR=${CURDIR}/../ray/
+
+PYTHON_WF := $(shell find ./ -name '*_wf.py')
+YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})
+
+workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
+
+venv::
+
+build::
+
+test::
+
+test-src::
+
+test-image::
+
+image::
+
+load-image::
+
+.PHONY: workflow-build
+workflow-build: workflow-venv
+ $(MAKE) $(YAML_WF)
+
+.PHONY: workflow-test
+workflow-test: workflow-build
+ $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=tokenization_wf.yaml
+
+.PHONY: workflow-upload
+workflow-upload: workflow-build
+ @for file in $(YAML_WF); do \
+ $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
+ done
+
+.PHONY: workflow-reconcile-requirements
+workflow-reconcile-requirements:
+ @for file in $(PYTHON_WF); do \
+ $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
+ done
diff --git a/transforms/universal/tokenization/kfp_ray/v1/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py
similarity index 66%
rename from transforms/universal/tokenization/kfp_ray/v1/tokenization_wf.py
rename to transforms/universal/tokenization/kfp_ray/tokenization_wf.py
index 2ff84bdfd..f74d0a331 100644
--- a/transforms/universal/tokenization/kfp_ray/v1/tokenization_wf.py
+++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py
@@ -9,32 +9,117 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
+import os
+
+from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils
import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
-from kfp_support.workflow_support.utils import (
- ONE_HOUR_SEC,
- ONE_WEEK_SEC,
- ComponentUtils,
-)
-
# the name of the job script
EXEC_SCRIPT_NAME: str = "tokenization_transform_ray.py"
-task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.3.0"
+task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.4.0.dev6"
# components
-base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0"
+base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6"
+# path to kfp component specifications files
+
# path to kfp component specifications files
+component_spec_path = "../../../../kfp/kfp_ray_components/"
+
-component_spec_path = "../../../../../kfp/kfp_ray_components/"
# compute execution parameters. Here different tranforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
-compute_exec_params_op = comp.func_to_container_op(
- func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
-)
+def compute_exec_params_func(
+ worker_options: str,
+ actor_options: str,
+ data_s3_config: str,
+ data_max_files: int,
+ data_num_samples: int,
+ runtime_pipeline_id: str,
+ runtime_job_id: str,
+ runtime_code_location: str,
+ tkn_tokenizer: str,
+ tkn_tokenizer_args: str,
+ tkn_doc_id_column: str,
+ tkn_doc_content_column: str,
+ tkn_text_lang: str,
+ tkn_chunk_size: int
+
+
+) -> dict:
+ from workflow_support.runtime_utils import KFPUtils
+
+ return {
+ "data_s3_config": data_s3_config,
+ "data_max_files": data_max_files,
+ "data_num_samples": data_num_samples,
+ "runtime_num_workers": KFPUtils.default_compute_execution_params(worker_options, actor_options),
+ "runtime_worker_options": actor_options,
+ "runtime_pipeline_id": runtime_pipeline_id,
+ "runtime_job_id": runtime_job_id,
+ "runtime_code_location": runtime_code_location,
+ "tkn_tokenizer": tkn_tokenizer,
+ "tkn_tokenizer_args": tkn_tokenizer_args,
+ "tkn_doc_id_column": tkn_doc_id_column,
+ "tkn_doc_content_column": tkn_doc_content_column,
+ "tkn_text_lang": tkn_text_lang,
+ "tkn_chunk_size": tkn_chunk_size,
+ }
+
+
+# KFPv1 and KFP2 uses different methods to create a component from a function. KFPv1 uses the
+# `create_component_from_func` function, but it is deprecated by KFPv2 and so has a different import path.
+# KFPv2 recommends using the `@dsl.component` decorator, which doesn't exist in KFPv1. Therefore, here we use
+# this if/else statement and explicitly call the decorator.
+if os.getenv("KFPv2", "0") == "1":
+ # In KFPv2 dsl.RUN_ID_PLACEHOLDER is deprecated and cannot be used since SDK 2.5.0. On another hand we cannot create
+ # a unique string in a component (at runtime) and pass it to the `clean_up_task` of `ExitHandler`, due to
+ # https://github.com/kubeflow/pipelines/issues/10187. Therefore, meantime we use a unique string created at
+ # compilation time.
+ import uuid
+
+ compute_exec_params_op = dsl.component_decorator.component(
+ func=compute_exec_params_func, base_image=base_kfp_image
+ )
+ print(
+ "WARNING: the ray cluster name can be non-unique at runtime, please do not execute simultaneous Runs of the " +
+ "same version of the same pipeline !!!")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ run_id = uuid.uuid4().hex
+else:
+ compute_exec_params_op = comp.create_component_from_func(func=compute_exec_params_func, base_image=base_kfp_image)
+ run_id = dsl.RUN_ID_PLACEHOLDER
+
# create Ray cluster
create_ray_op = comp.load_component_from_file(component_spec_path + "createRayClusterComponent.yaml")
# execute job
@@ -116,7 +201,7 @@ def tokenization(
:return: None
"""
# create clean_up task
- clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=dsl.RUN_ID_PLACEHOLDER, server_url=server_url)
+ clean_up_task = cleanup_ray_op(ray_name=ray_name, run_id=run_id, server_url=server_url)
ComponentUtils.add_settings_to_component(clean_up_task, 60)
# pipeline definition
with dsl.ExitHandler(clean_up_task):
@@ -124,10 +209,21 @@ def tokenization(
compute_exec_params = compute_exec_params_op(
worker_options=ray_worker_options,
actor_options=runtime_actor_options,
+ data_s3_config=data_s3_config,
+ data_max_files=data_max_files,
+ data_num_samples=data_num_samples,
+ runtime_pipeline_id=runtime_pipeline_id,
+ runtime_job_id=run_id,
+ runtime_code_location=runtime_code_location,
+ tkn_tokenizer=tkn_tokenizer,
+ tkn_tokenizer_args=tkn_tokenizer_args,
+ tkn_doc_id_column=tkn_doc_id_column,
+ tkn_doc_content_column=tkn_doc_content_column,
+ tkn_text_lang=tkn_text_lang,
+ tkn_chunk_size=tkn_chunk_size,
)
+
ComponentUtils.add_settings_to_component(compute_exec_params, ONE_HOUR_SEC * 2)
- ComponentUtils.set_s3_env_vars_to_component(compute_exec_params, data_s3_access_secret)
-
# start Ray cluster
ray_cluster = create_ray_op(
ray_name=ray_name,
@@ -144,22 +240,7 @@ def tokenization(
ray_name=ray_name,
run_id=dsl.RUN_ID_PLACEHOLDER,
additional_params=additional_params,
- exec_params={
- "data_s3_config": data_s3_config,
- "data_max_files": data_max_files,
- "data_num_samples": data_num_samples,
- "runtime_num_workers": compute_exec_params.output,
- "runtime_worker_options": runtime_actor_options,
- "runtime_pipeline_id": runtime_pipeline_id,
- "runtime_job_id": dsl.RUN_ID_PLACEHOLDER,
- "runtime_code_location": runtime_code_location,
- "tkn_tokenizer": tkn_tokenizer,
- "tkn_tokenizer_args": tkn_tokenizer_args,
- "tkn_doc_id_column": tkn_doc_id_column,
- "tkn_doc_content_column": tkn_doc_content_column,
- "tkn_text_lang": tkn_text_lang,
- "tkn_chunk_size": tkn_chunk_size,
- },
+ exec_params=compute_exec_params.output,
exec_script_name=EXEC_SCRIPT_NAME,
server_url=server_url,
)
diff --git a/transforms/universal/tokenization/kfp_ray/v1/Makefile b/transforms/universal/tokenization/kfp_ray/v1/Makefile
deleted file mode 100644
index 5814e2935..000000000
--- a/transforms/universal/tokenization/kfp_ray/v1/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-REPOROOT=${CURDIR}/../../../../../
-WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
-include $(REPOROOT)/transforms/.make.workflows
-
-SRC_DIR=${CURDIR}/../../ray/
-
-YAML_FILE=tokenization_wf.yaml
-
-workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}
-
-.PHONY: workflow-build
-workflow-build: workflow-venv
- $(MAKE) ${YAML_FILE}
-
-.PHONY: workflow-test
-workflow-test: workflow-build
- $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-upload
-workflow-upload: workflow-build
- $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=${YAML_FILE}
-
-.PHONY: workflow-reconcile-requirements
-workflow-reconcile-requirements:
- $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=tokenization_wf.py