Skip to content

Commit

Permalink
Merge pull request #236 from revit13/fix-tags
Browse files Browse the repository at this point in the history
Fix tasks tags in kfp workflows.
  • Loading branch information
revit13 authored Jun 13, 2024
2 parents 87e37af + 286ce59 commit abab08c
Show file tree
Hide file tree
Showing 29 changed files with 84 additions and 43 deletions.
33 changes: 22 additions & 11 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,37 @@ DPK_LIB_KFP_SHARED=0.2.0$(RELEASE_VERSION_SUFFIX)

# Begin transform versions/tags
BLOCKLIST_VERSION=0.4.2$(RELEASE_VERSION_SUFFIX)
DOC_ID_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
DOC_ID_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
EDEDUP_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FDEDUP_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

DOC_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
DOC_ID_SPARK_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

EDEDUP_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

FDEDUP_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

FILTER_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)

NOOP_PYTHON_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX)
NOOP_RAY_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX)
NOOP_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)

RESIZE_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

LANG_ID_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
LANG_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
TOKENIZATION_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
MALWARE_VERSION=0.5.0$(RELEASE_VERSION_SUFFIX)
PROGLANG_SELECT_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
CODE_QUALITY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
DOC_QUALITY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
INGEST_TO_PARQUET_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

TOKENIZATION_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
TOKENIZATION_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

MALWARE_RAY_VERSION=0.5.0$(RELEASE_VERSION_SUFFIX)

PROGLANG_SELECT_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

CODE_QUALITY_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

INGEST_TO_PARQUET_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
INGEST_TO_PARQUET_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

KFP_DOCKER_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
2 changes: 1 addition & 1 deletion data-processing-lib/doc/spark-runtime.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The Spark runtime extends the base framework with the following set of component
## Transforms

* [AbstractSparkTransform](../spark/src/data_processing_spark/runtime/spark/spark_transform.py) - this
is the base class for all spark-based tranforms over spark DataFrames.
is the base class for all spark-based transforms over spark DataFrames.
* [SparkTransformConfiguration](../spark/src/data_processing_spark/runtime/spark/spark_transform_config.py) - this
is simple extension of the base TransformConfiguration class to hold the transformation class
(an extension of AbstractSpartTransform).
Expand Down
2 changes: 1 addition & 1 deletion kfp/doc/simple_transform_pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Ray cluster. For each step we have to define a component that will execute them:
```python
# components
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.0.2"
# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
compute_exec_params_op = comp.func_to_container_op(
func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
Expand Down
2 changes: 1 addition & 1 deletion kfp/pipeline_generator/pipeline.ptmpl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ base_kfp_image = "__kfp_base_image__"
# path to kfp component specifications files
component_spec_path = "__component_spec_path__"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
compute_exec_params_op = comp.func_to_container_op(
func=__compute_func_name__, base_image=base_kfp_image
Expand Down
7 changes: 5 additions & 2 deletions transforms/.make.workflows
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
include ${REPOROOT}/.make.versions
IGNORE := $(shell bash -c "sed -nr /^[^\#]*=/p ${REPOROOT}/.make.versions | sed 's/=/:=/' | sed 's/^/export /' > makeenv")

include makeenv
include ${REPOROOT}/kfp/requirements.env

# Include the common rules.
Expand All @@ -14,7 +16,8 @@ endef
# FIXME
.PHONY: .transforms_workflows.reconcile-requirements
.transforms_workflows.reconcile-requirements:

cd ${REPOROOT}/kfp/kfp_ray_components && $(MAKE) reconcile-requirements
${REPOROOT}/transforms/hack/update_workflow_tags.sh ${REPOROOT}/.make.versions ${PIPELINE_FILE}

.PHONY: .transforms_workflows.compile-pipeline
.transforms_workflows.compile-pipeline:
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/kfp_ray/code_quality_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=code_quality
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${CODE_QUALITY_VERSION}
DOCKER_IMAGE_VERSION=${CODE_QUALITY_RAY_VERSION}

# Use default rule inherited from makefile.common
clean:: .transforms.clean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
compute_exec_params_op = comp.func_to_container_op(
func=ComponentUtils.default_compute_execution_params, base_image=base_kfp_image
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/ingest_2_parquet/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=ingest_2_parquet
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${INGEST_TO_PARQUET_VERSION}
DOCKER_IMAGE_VERSION=${INGEST_TO_PARQUET_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/malware/kfp_ray/malware_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/malware/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=malware
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${MALWARE_VERSION}
DOCKER_IMAGE_VERSION=${MALWARE_RAY_VERSION}

OS := $(shell uname -s)
ifeq ($(OS),Darwin)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
component_spec_path = "../../../../kfp/kfp_ray_components/"


# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=proglang_select
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${PROGLANG_SELECT_VERSION}
DOCKER_IMAGE_VERSION=${PROGLANG_SELECT_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down
26 changes: 26 additions & 0 deletions transforms/hack/update_workflow_tags.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -euo pipefail

if [[ $# != 2 ]]; then
cat << EOF
"Incorrect number of parameters provided. The required parameters are versions_file and pipeline_path.
EOF
exit 1
fi

versions_file=$1
pipeline_path=$2

# Modify the tasks tags as defined in the versions file
while IFS= read -r line; do
[ -z "$line" ] && continue
[[ $line == *#* ]] && continue
VERSION_NAME=$(echo $line |cut -d "=" -f 1)
DOCKER_IMAGE_NAME=$(echo $line |cut -d "=" -f 1 |sed "s/_VERSION//" |tr '[:upper:]' '[:lower:]')
DOCKER_IMAGE_NAME=$(echo $DOCKER_IMAGE_NAME |sed "s/_ray$/\-ray/" | sed "s/_spark$/\-spark/" | sed "s/_parquet$/\-parquet/")
DOCKER_IMAGE_VERSION=$(eval echo ${!VERSION_NAME})
sed -i.back "s/data-prep-kit\/$DOCKER_IMAGE_NAME:.*/data-prep-kit\/$DOCKER_IMAGE_NAME:$DOCKER_IMAGE_VERSION\"/" $pipeline_path
done < $versions_file
# Update kfp component image tag
sed -i.back "s/kfp-data-processing:.*/kfp-data-processing:$KFP_DOCKER_VERSION\"/" $pipeline_path
10 changes: 5 additions & 5 deletions transforms/language/lang_id/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,20 @@ load-image::

.PHONY: workflow-venv
workflow-venv:
$(MAKE) -C kfp_ray/v1 workflow-venv
$(MAKE) -C kfp_ray workflow-venv

.PHONY: workflow-build
workflow-build:
$(MAKE) -C kfp_ray/v1 workflow-build
$(MAKE) -C kfp_ray workflow-build

.PHONY: workflow-test
workflow-test:
$(MAKE) -C kfp_ray/v1 workflow-test
$(MAKE) -C kfp_ray workflow-test

.PHONY: workflow-upload
workflow-upload:
$(MAKE) -C kfp_ray/v1 workflow-upload
$(MAKE) -C kfp_ray workflow-upload

.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
$(MAKE) -C kfp_ray/v1 workflow-reconcile-requirements
$(MAKE) -C kfp_ray workflow-reconcile-requirements
4 changes: 1 addition & 3 deletions transforms/language/lang_id/kfp_ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ set-versions: workflow-reconcile-requirements

.PHONY: workflow-build
workflow-build: workflow-venv
@for file in $(YAML_WF); do \
$(MAKE) $$file; \
done
$(MAKE) $(YAML_WF)

.PHONY: workflow-test
workflow-test: workflow-build
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/doc_id/kfp_ray/doc_id_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/doc_id/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=doc_id
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${DOC_ID_VERSION}
DOCKER_IMAGE_VERSION=${DOC_ID_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down
1 change: 1 addition & 0 deletions transforms/universal/ededup/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ workflow-upload:
$(MAKE) -C kfp_ray workflow-upload

.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
$(MAKE) -C kfp_ray workflow-reconcile-requirements
2 changes: 1 addition & 1 deletion transforms/universal/ededup/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=ededup
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${EDEDUP_VERSION}
DOCKER_IMAGE_VERSION=${EDEDUP_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down
2 changes: 2 additions & 0 deletions transforms/universal/fdedup/kfp_ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ image::

test-image::

load-image::

set-versions: workflow-reconcile-requirements

.PHONY: workflow-build
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/fdedup/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=fdedup
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${FDEDUP_VERSION}
DOCKER_IMAGE_VERSION=${FDEDUP_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/filter/kfp_ray/filter_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/filter/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=filter
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${FILTER_VERSION}
DOCKER_IMAGE_VERSION=${FILTER_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/noop/kfp_ray/noop_multiple_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/noop/kfp_ray/noop_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"

# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
component_spec_path = "../../../../kfp/kfp_ray_components/"


# compute execution parameters. Here different tranforms might need different implementations. As
# compute execution parameters. Here different transforms might need different implementations. As
# a result, instead of creating a component we are creating it in place here.
def compute_exec_params_func(
worker_options: str,
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/tokenization/python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=tokenization
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${TOKENIZATION_VERSION}
DOCKER_IMAGE_VERSION=${TOKENIZATION_PYTHON_VERSION}

venv:: .transforms.python-venv

Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/tokenization/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=tokenization
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${TOKENIZATION_VERSION}
DOCKER_IMAGE_VERSION=${TOKENIZATION_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down

0 comments on commit abab08c

Please sign in to comment.