diff --git a/.make.versions b/.make.versions index 54e6d8ca1..662782452 100644 --- a/.make.versions +++ b/.make.versions @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2 DPK_MICRO_VERSION=1 # The suffix is generally always set in the main/development branch and only nulled out when creating release branches. # It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi. -DPK_VERSION_SUFFIX=.dev3 +DPK_VERSION_SUFFIX= DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) diff --git a/data-processing-lib/python/pyproject.toml b/data-processing-lib/python/pyproject.toml index 9ff6c2d7f..d95cb3075 100644 --- a/data-processing-lib/python/pyproject.toml +++ b/data-processing-lib/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Library" @@ -41,7 +41,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/data_processing"] diff --git a/data-processing-lib/ray/pyproject.toml b/data-processing-lib/ray/pyproject.toml index 3f347cdf4..c67ad8a9c 100644 --- a/data-processing-lib/ray/pyproject.toml +++ b/data-processing-lib/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_ray" -version = "0.2.1.dev3" +version = "0.2.1" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] requires-python = ">=3.10" description = "Data Preparation Toolkit Library for Ray" @@ -11,7 +11,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit>=0.2.1.dev3", + "data-prep-toolkit>=0.2.1", "ray[default]==2.24.0", # These two are to fix security issues identified by quay.io "fastapi>=0.110.2", @@ -42,7 +42,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/data_processing_ray"] diff --git a/data-processing-lib/spark/Makefile b/data-processing-lib/spark/Makefile index d4d01ba74..51290ccf5 100644 --- a/data-processing-lib/spark/Makefile +++ b/data-processing-lib/spark/Makefile @@ -4,7 +4,6 @@ include $(REPOROOT)/.make.defaults SPARK_VERSION=3.5.2 DOCKER_IMAGE_NAME=data-prep-kit-spark-$(SPARK_VERSION) DOCKER_IMAGE_LIB_NAME=data-prep-kit-spark -DOCKER_IMAGE_VERSION := latest .check-env:: diff --git a/data-processing-lib/spark/pyproject.toml b/data-processing-lib/spark/pyproject.toml index b6e9edddb..7514f0a50 100644 --- a/data-processing-lib/spark/pyproject.toml +++ b/data-processing-lib/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_spark" -version = "0.2.1.dev3" +version = "0.2.1" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] requires-python = ">=3.10" description = "Data Preparation Toolkit Library for Spark" @@ -11,7 +11,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "pyspark>=3.5.2", "psutil>=6.0.0" ] diff --git a/kfp/kfp_ray_components/createRayClusterComponent.yaml b/kfp/kfp_ray_components/createRayClusterComponent.yaml index 30b0b66d8..36e88d978 100644 --- a/kfp/kfp_ray_components/createRayClusterComponent.yaml +++ b/kfp/kfp_ray_components/createRayClusterComponent.yaml @@ -11,7 +11,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml index 44e199c47..3c3e79b9c 100644 --- a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml +++ b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml @@ -9,7 +9,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeRayJobComponent.yaml b/kfp/kfp_ray_components/executeRayJobComponent.yaml index 7ab517bff..4dccd78d0 100644 --- a/kfp/kfp_ray_components/executeRayJobComponent.yaml +++ b/kfp/kfp_ray_components/executeRayJobComponent.yaml @@ -12,7 +12,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml index 9b98912f0..0b1d4ecef 100644 --- a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml +++ b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml @@ -13,7 +13,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml index 6b261a003..335713462 100644 --- a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml +++ b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml @@ -27,7 +27,7 @@ outputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists, and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml index eaea5fb0d..a4291b093 100644 --- a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_v1" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -13,7 +13,7 @@ authors = [ ] dependencies = [ "kfp==1.8.22", - "data-prep-toolkit-kfp-shared==0.2.1.dev3", + "data-prep-toolkit-kfp-shared==0.2.1", ] [build-system] diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml index c5ca32f1a..866ca157d 100644 --- a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_v2" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -14,7 +14,7 @@ authors = [ dependencies = [ "kfp==2.8.0", "kfp-kubernetes==1.2.0", - "data-prep-toolkit-kfp-shared==0.2.1.dev3", + "data-prep-toolkit-kfp-shared==0.2.1", ] [build-system] diff --git a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml index b4f509433..a0fa9e760 100644 --- a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_shared" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -14,7 +14,7 @@ authors = [ dependencies = [ "requests", "kubernetes", - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py index 867c83198..a35a9fbf6 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py @@ -17,14 +17,14 @@ run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_tokenization_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -code_to_parquet_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:latest" -proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:latest" -code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest" -malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:latest" -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:latest" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:latest" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:latest" -tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:latest" +code_to_parquet_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.2.1" +proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.1" +code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.1" +malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.1" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.1" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.1" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.1" +tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.1" # Pipeline to invoke execution on remote resource diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py index 8243a65b5..947aece29 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py @@ -12,9 +12,9 @@ run_exact_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:latest" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:latest" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:latest" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.1" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.1" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.1" # Pipeline to invoke execution on remote resource @dsl.pipeline( diff --git a/release-notes.md b/release-notes.md index 52aca0930..8d114b8ff 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,5 +1,36 @@ # Data Prep Kit Release notes +## Release 0.2.1 - 9/24/2024 + +### General +1. Bug fixes across the repo +1. Added AI Alliance RAG demo, tutorials and notebooks and tips for running on google colab +1. Added new transforms and single package for transforms published to pypi +1. Improved CI/CD with targeted workflow triggered on specific changes to specific modules +1. New enhancements for cutting a release + + +### data-prep-toolkit libraries (python, ray, spark) + +1. Restructure the repository to distinguish/separate runtime libraries +1. Split data-processing-lib/ray into python and ray +1. Spark runtime +1. Updated pyarrow version +1. Define required transform() method as abstract to AbstractTableTransform +1. Enables configuration of makefile to use src or pypi for data-prep-kit library dependencies + + +### KFP Workloads + +1. Add a configurable timeout before destroying the deployed Ray cluster. + +### Transforms + +1. Added 7 new transdforms including: language identification, profiler, repo level ordering, doc quality, pdf2parquet, HTML2Parquet and PII Transform +1. Added ededup python implementation and incremental ededup +1. Added fuzzy floating point comparison + + ## Release 0.2.0 - 6/27/2024 ### General diff --git a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py index a2080e70a..68fe95384 100644 --- a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py +++ b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py @@ -21,11 +21,11 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "code2parquet_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml index 79f0988be..c849a0de5 100644 --- a/transforms/code/code2parquet/python/pyproject.toml +++ b/transforms/code/code2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "code2parquet Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "parameterized", "pandas", ] @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index c7f1a1563..21c34ec32 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "code2parquet Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", - "dpk-code2parquet-transform-python==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", + "dpk-code2parquet-transform-python==0.2.1", "parameterized", "pandas", ] @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/code_quality/kfp_ray/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py index 138b5d613..cc2b424e1 100644 --- a/transforms/code/code_quality/kfp_ray/code_quality_wf.py +++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py @@ -21,10 +21,10 @@ EXEC_SCRIPT_NAME: str = "code_quality_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index 88c8f9031..58c0ed6eb 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Code Quality Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "bs4==0.0.2", "transformers==4.38.2", ] @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 6925f45c0..e62dfed02 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Code Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-code-quality-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-code-quality-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py index ba82169c3..426e5fb91 100644 --- a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py +++ b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py @@ -21,10 +21,10 @@ EXEC_SCRIPT_NAME: str = "header_cleanser_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/header_cleanser/python/pyproject.toml b/transforms/code/header_cleanser/python/pyproject.toml index 2799974b4..14b609854 100644 --- a/transforms/code/header_cleanser/python/pyproject.toml +++ b/transforms/code/header_cleanser/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_header_cleanser_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "License and Copyright Removal Transform for Python" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "scancode-toolkit==32.1.0", ] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml index d40aa9373..8c05f2099 100644 --- a/transforms/code/header_cleanser/ray/pyproject.toml +++ b/transforms/code/header_cleanser/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_header_cleanser_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "License and copyright removal Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" }, ] dependencies = [ - "dpk-header-cleanser-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-header-cleanser-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", "scancode-toolkit==32.1.0", ] @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/malware/kfp_ray/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py index d9ec70b37..5d7b4e5c5 100644 --- a/transforms/code/malware/kfp_ray/malware_wf.py +++ b/transforms/code/malware/kfp_ray/malware_wf.py @@ -21,10 +21,10 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "malware_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index 9e5e122ca..707c3a106 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Malware Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "clamd==1.0.2", ] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index 60d9a3089..8e221d732 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Malware Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "dpk-malware-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-malware-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py index 209121cd4..f9f2c4a62 100644 --- a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py +++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py @@ -21,10 +21,10 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "proglang_select_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 7fcef9bfc..da05b8acf 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Programming Language Selection Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] @@ -30,7 +30,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index 703bf5279..ae778f40d 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Programming Language Selection Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-proglang-select-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-proglang-select-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py index 256636176..c91963bec 100644 --- a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py +++ b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "repo_level_order_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml index 6f54a65ed..7cc8bdb41 100644 --- a/transforms/code/repo_level_ordering/ray/pyproject.toml +++ b/transforms/code/repo_level_ordering/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_repo_level_order_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "repo_level_order Ray Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Shanmukha Guttula", email = "shagutt1@in.ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", "networkx==3.3", "colorlog==6.8.2", "func-timeout==4.3.5", @@ -37,7 +37,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py index a613955c9..eb76a8ec6 100644 --- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py +++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py index 7fb107758..313476604 100644 --- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py +++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_chunk/python/pyproject.toml b/transforms/language/doc_chunk/python/pyproject.toml index 4deb09d47..ddcadba5b 100644 --- a/transforms/language/doc_chunk/python/pyproject.toml +++ b/transforms/language/doc_chunk/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_chunk_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "chunk documents Python Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "docling-core==1.3.0", "llama-index-core>=0.11.0,<0.12.0", ] @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml index 19288e2db..76e368c27 100644 --- a/transforms/language/doc_chunk/ray/pyproject.toml +++ b/transforms/language/doc_chunk/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_chunk_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "chunk documents Ray Transform" license = {text = "Apache-2.0"} @@ -11,8 +11,8 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "dpk-doc-chunk-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-doc-chunk-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py index c68715b5d..75ebde0d1 100644 --- a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py +++ b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py index b42262468..e334e0d13 100644 --- a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py +++ b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_quality/python/pyproject.toml b/transforms/language/doc_quality/python/pyproject.toml index e63a6d5e5..c8d9fa11c 100644 --- a/transforms/language/doc_quality/python/pyproject.toml +++ b/transforms/language/doc_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_quality_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Document Quality Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml index 6bc9cc6c6..4d642384e 100644 --- a/transforms/language/doc_quality/ray/pyproject.toml +++ b/transforms/language/doc_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_quality_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Document Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-doc_quality-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3" + "dpk-doc_quality-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1" ] [build-system] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py index ecd58b6fe..3696cead6 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/lang_id/kfp_ray/lang_id_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_wf.py index 4f581cf2c..01e041928 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index f2dd72919..240dbdd49 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Language Identification Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "fasttext==0.9.2", "langcodes==3.3.0", "huggingface-hub >= 0.21.4, <1.0.0", @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index 4833913a4..be96eea96 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Language Identification Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-lang_id-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-lang_id-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py index fdfbc1fe4..824facde4 100644 --- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py +++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py index c3bf399fe..a3bf50759 100644 --- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py +++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pdf2parquet/python/pyproject.toml b/transforms/language/pdf2parquet/python/pyproject.toml index 24f2294b5..bc8f29c52 100644 --- a/transforms/language/pdf2parquet/python/pyproject.toml +++ b/transforms/language/pdf2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pdf2parquet_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PDF2PARQUET Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "docling-core==1.2.0", "docling-ibm-models==1.1.7", "deepsearch-glm==0.21.0", @@ -36,7 +36,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pdf2parquet/ray/pyproject.toml b/transforms/language/pdf2parquet/ray/pyproject.toml index 950e5ce3d..57deeedc1 100644 --- a/transforms/language/pdf2parquet/ray/pyproject.toml +++ b/transforms/language/pdf2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pdf2parquet_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PDF2PARQUET Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "dpk-pdf2parquet-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-pdf2parquet-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py b/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py index f1c4dac98..381865cf3 100644 --- a/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py +++ b/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "pii_redactor_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pii_redactor/python/pyproject.toml b/transforms/language/pii_redactor/python/pyproject.toml index a61987a45..7931e1ece 100644 --- a/transforms/language/pii_redactor/python/pyproject.toml +++ b/transforms/language/pii_redactor/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pii_redactor_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PII redactor Transform for Python" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Sowmya.L.R", email = "lrsowmya@gmail.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "presidio-analyzer>=2.2.355", "presidio-anonymizer>=2.2.355", "flair>=0.14.0", @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml index a1b01be94..a6900b23d 100644 --- a/transforms/language/pii_redactor/ray/pyproject.toml +++ b/transforms/language/pii_redactor/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pii_redactor_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PII Redactor Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk_pii_redactor_transform_python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk_pii_redactor_transform_python==0.2.1", + "data-prep-toolkit-ray==0.2.1", "presidio-analyzer>=2.2.355", "presidio-anonymizer>=2.2.355", "flair>=0.14.0", @@ -36,7 +36,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py index 120c53c99..1a4eb864f 100644 --- a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py +++ b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py index d402c8832..0a3627b86 100644 --- a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py +++ b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/text_encoder/python/pyproject.toml b/transforms/language/text_encoder/python/pyproject.toml index 1ed8725ab..615cff94b 100644 --- a/transforms/language/text_encoder/python/pyproject.toml +++ b/transforms/language/text_encoder/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_text_encoder_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Text Encoder Python Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Peter Staar", email = "taa@zurich.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "sentence-transformers==3.0.1", ] @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml index aa8af8b44..cd9580692 100644 --- a/transforms/language/text_encoder/ray/pyproject.toml +++ b/transforms/language/text_encoder/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_text_encoder_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Text Encoder Ray Transform" license = {text = "Apache-2.0"} @@ -11,8 +11,8 @@ authors = [ { name = "Peter Staar", email = "taa@zurich.ibm.com" }, ] dependencies = [ - "dpk-text_encoder-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-text_encoder-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/packaging/python/pyproject.toml b/transforms/packaging/python/pyproject.toml index 5ddb40aae..fdea22080 100644 --- a/transforms/packaging/python/pyproject.toml +++ b/transforms/packaging/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_transforms" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms" diff --git a/transforms/packaging/python/requirements.txt b/transforms/packaging/python/requirements.txt index 6dec1e2de..e4ad2694a 100644 --- a/transforms/packaging/python/requirements.txt +++ b/transforms/packaging/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.1.dev3 +data-prep-toolkit>=0.2.1 bs4==0.0.2 #pdf2parquet # conflict with chunking.... diff --git a/transforms/packaging/ray/pyproject.toml b/transforms/packaging/ray/pyproject.toml index 9c1509472..971bb5ef9 100644 --- a/transforms/packaging/ray/pyproject.toml +++ b/transforms/packaging/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_transforms_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms using Ray" diff --git a/transforms/packaging/ray/requirements.txt b/transforms/packaging/ray/requirements.txt index 2e75ae185..fed96622b 100644 --- a/transforms/packaging/ray/requirements.txt +++ b/transforms/packaging/ray/requirements.txt @@ -1,5 +1,5 @@ -data-prep-toolkit-ray>=0.2.1.dev3 -data-prep-toolkit-transforms>=0.2.1.dev3 +data-prep-toolkit-ray>=0.2.1 +data-prep-toolkit-transforms>=0.2.1 scancode-toolkit==32.1.0 ; platform_system != 'Darwin' parameterized tqdm==4.66.3 diff --git a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py index 1eb96af25..5d381ab47 100644 --- a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py +++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py @@ -17,12 +17,12 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/doc_id/python/pyproject.toml b/transforms/universal/doc_id/python/pyproject.toml index 8e4358b28..1e822549b 100644 --- a/transforms/universal/doc_id/python/pyproject.toml +++ b/transforms/universal/doc_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "ededup Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3" + "data-prep-toolkit==0.2.1" ] [build-system] diff --git a/transforms/universal/doc_id/ray/pyproject.toml b/transforms/universal/doc_id/ray/pyproject.toml index e5cb79d95..c8d495d0e 100644 --- a/transforms/universal/doc_id/ray/pyproject.toml +++ b/transforms/universal/doc_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "docid Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk_doc_id_transform_python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3" + "dpk_doc_id_transform_python==0.2.1", + "data-prep-toolkit-ray==0.2.1" ] [build-system] diff --git a/transforms/universal/doc_id/spark/Dockerfile b/transforms/universal/doc_id/spark/Dockerfile index 2b529de8d..6dd3cb63d 100644 --- a/transforms/universal/doc_id/spark/Dockerfile +++ b/transforms/universal/doc_id/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1.dev0 +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root diff --git a/transforms/universal/doc_id/spark/pyproject.toml b/transforms/universal/doc_id/spark/pyproject.toml index 13d7bc2c3..f15c84305 100644 --- a/transforms/universal/doc_id/spark/pyproject.toml +++ b/transforms/universal/doc_id/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_spark" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Doc ID Spark Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-spark==0.2.1.dev3", + "data-prep-toolkit-spark==0.2.1", ] [build-system] diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py index 306391d6c..ed80a2084 100644 --- a/transforms/universal/ededup/kfp_ray/ededup_wf.py +++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/ededup/python/pyproject.toml b/transforms/universal/ededup/python/pyproject.toml index e380bf58e..119caaa76 100644 --- a/transforms/universal/ededup/python/pyproject.toml +++ b/transforms/universal/ededup/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_ededup_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "ededup Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "mmh3==4.1.0", "xxhash==3.4.1", ] diff --git a/transforms/universal/ededup/ray/pyproject.toml b/transforms/universal/ededup/ray/pyproject.toml index 2fdf82392..0b444de24 100644 --- a/transforms/universal/ededup/ray/pyproject.toml +++ b/transforms/universal/ededup/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_ededup_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "ededup Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", - "dpk_ededup_transform_python==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", + "dpk_ededup_transform_python==0.2.1", "tqdm==4.66.3", ] diff --git a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py index c98ffafa3..2937d2bdb 100644 --- a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py +++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "fdedup_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/fdedup/ray/pyproject.toml b/transforms/universal/fdedup/ray/pyproject.toml index 70f92a23f..95ce1a883 100644 --- a/transforms/universal/fdedup/ray/pyproject.toml +++ b/transforms/universal/fdedup/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_fdedup_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "fdedup Ray Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", "mmh3==4.1.0", "xxhash==3.4.1", "tqdm==4.66.3", diff --git a/transforms/universal/filter/kfp_ray/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py index b998cd7b5..ac87d1d7c 100644 --- a/transforms/universal/filter/kfp_ray/filter_wf.py +++ b/transforms/universal/filter/kfp_ray/filter_wf.py @@ -21,10 +21,10 @@ EXEC_SCRIPT_NAME: str = "filter_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml index 995247f4f..117ba6023 100644 --- a/transforms/universal/filter/python/pyproject.toml +++ b/transforms/universal/filter/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Filter Transform for Python" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "duckdb==0.10.1", ] diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index fc0035475..1dd1efeee 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Filter Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "dpk-filter-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-filter-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/universal/filter/spark/Dockerfile b/transforms/universal/filter/spark/Dockerfile index 92eebbee8..75bd07aad 100644 --- a/transforms/universal/filter/spark/Dockerfile +++ b/transforms/universal/filter/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1.dev0 +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root diff --git a/transforms/universal/filter/spark/pyproject.toml b/transforms/universal/filter/spark/pyproject.toml index 4d31c2ef2..f82807e9a 100644 --- a/transforms/universal/filter/spark/pyproject.toml +++ b/transforms/universal/filter/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_spark" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Filter Spark Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-spark==0.2.1.dev3", + "data-prep-toolkit-spark==0.2.1", ] [project.optional-dependencies] diff --git a/transforms/universal/html2parquet/python/pyproject.toml b/transforms/universal/html2parquet/python/pyproject.toml index f49c498d6..de6b606f4 100644 --- a/transforms/universal/html2parquet/python/pyproject.toml +++ b/transforms/universal/html2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_html2parquet_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "HTML2PARQUET Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Syed Zawad", email = "szawad@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "trafilatura==1.12.0" ] diff --git a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py index a1f6592a8..2781153c1 100644 --- a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/noop/kfp_ray/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py index 67405f134..ebeff5f33 100644 --- a/transforms/universal/noop/kfp_ray/noop_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml index 5714e70de..8f2ec097e 100644 --- a/transforms/universal/noop/python/pyproject.toml +++ b/transforms/universal/noop/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "NOOP Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index 9f1353b4e..e6d9cbddb 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "NOOP Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-noop-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index e72cb06ae..47c7134b3 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1.dev0 +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml index 965770d92..8ee6b7f54 100644 --- a/transforms/universal/noop/spark/pyproject.toml +++ b/transforms/universal/noop/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_spark" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "NOOP Spark Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.2.1.dev3", - "data-prep-toolkit-spark==0.2.1.dev3", + "dpk-noop-transform-python==0.2.1", + "data-prep-toolkit-spark==0.2.1", ] [build-system] diff --git a/transforms/universal/profiler/kfp_ray/profiler_wf.py b/transforms/universal/profiler/kfp_ray/profiler_wf.py index 7f21fa3e0..3d14a59a8 100644 --- a/transforms/universal/profiler/kfp_ray/profiler_wf.py +++ b/transforms/universal/profiler/kfp_ray/profiler_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "profiler_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/profiler/ray/pyproject.toml b/transforms/universal/profiler/ray/pyproject.toml index 1473b88b4..555b74b2a 100644 --- a/transforms/universal/profiler/ray/pyproject.toml +++ b/transforms/universal/profiler/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_profiler_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "profiler Ray Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", "mmh3==4.1.0", "xxhash==3.4.1", "tqdm==4.66.3", diff --git a/transforms/universal/resize/kfp_ray/resize_wf.py b/transforms/universal/resize/kfp_ray/resize_wf.py index f9b325674..4b95ff774 100644 --- a/transforms/universal/resize/kfp_ray/resize_wf.py +++ b/transforms/universal/resize/kfp_ray/resize_wf.py @@ -17,12 +17,12 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/resize-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/resize-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "resize_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/resize/python/pyproject.toml b/transforms/universal/resize/python/pyproject.toml index b1cc13314..b1abb776a 100644 --- a/transforms/universal/resize/python/pyproject.toml +++ b/transforms/universal/resize/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "resize Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] diff --git a/transforms/universal/resize/ray/pyproject.toml b/transforms/universal/resize/ray/pyproject.toml index 86834c1b1..9a9f2afc0 100644 --- a/transforms/universal/resize/ray/pyproject.toml +++ b/transforms/universal/resize/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Resize Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-resize-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-resize-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py index ba96a790a..8cceb9b0e 100644 --- a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py +++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py @@ -20,10 +20,10 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "tokenization_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files # path to kfp component specifications files diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml index 1dc0ca104..68bd2d9bc 100644 --- a/transforms/universal/tokenization/python/pyproject.toml +++ b/transforms/universal/tokenization/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_tokenization_transform_python" keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Tokenization Transform for Python" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "transformers==4.38.2", ] diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index fd259a9b6..84b246625 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_tokenization_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Tokenization Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "dpk-tokenization-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-tokenization-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system]