From ca8087a472ad4a90c02aa30fd188f282b8a255ff Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 24 Sep 2024 19:03:57 +0200 Subject: [PATCH 01/11] Pull request source branch to cut release 0.2.1 Signed-off-by: Maroun Touma --- .make.versions | 2 +- data-processing-lib/python/pyproject.toml | 2 +- data-processing-lib/ray/pyproject.toml | 4 ++-- data-processing-lib/spark/pyproject.toml | 4 ++-- .../createRayClusterComponent.yaml | 2 +- .../deleteRayClusterComponent.yaml | 2 +- .../executeRayJobComponent.yaml | 2 +- .../executeRayJobComponent_multi_s3.yaml | 2 +- .../executeSubWorkflowComponent.yaml | 2 +- .../kfp_v1_workflow_support/pyproject.toml | 4 ++-- .../kfp_v2_workflow_support/pyproject.toml | 4 ++-- .../shared_workflow_support/pyproject.toml | 4 ++-- .../ray/kfp_v1/superworkflow_code_sample_wf.py | 16 ++++++++-------- .../ray/kfp_v1/superworkflow_dedups_sample_wf.py | 6 +++--- .../code/code2parquet/kfp_ray/code2parquet_wf.py | 4 ++-- .../code/code2parquet/python/pyproject.toml | 4 ++-- transforms/code/code2parquet/ray/pyproject.toml | 6 +++--- .../code/code_quality/kfp_ray/code_quality_wf.py | 4 ++-- .../code/code_quality/python/pyproject.toml | 4 ++-- transforms/code/code_quality/ray/pyproject.toml | 6 +++--- .../kfp_ray/header_cleanser_wf.py | 4 ++-- .../code/header_cleanser/python/pyproject.toml | 4 ++-- .../code/header_cleanser/ray/pyproject.toml | 6 +++--- transforms/code/malware/kfp_ray/malware_wf.py | 4 ++-- transforms/code/malware/python/pyproject.toml | 4 ++-- transforms/code/malware/ray/pyproject.toml | 6 +++--- .../kfp_ray/proglang_select_wf.py | 4 ++-- .../code/proglang_select/python/pyproject.toml | 4 ++-- .../code/proglang_select/ray/pyproject.toml | 6 +++--- .../kfp_ray/repo_level_order_wf.py | 2 +- .../code/repo_level_ordering/ray/pyproject.toml | 4 ++-- .../doc_chunk/kfp_ray/doc_chunk_multiple_wf.py | 4 ++-- .../language/doc_chunk/kfp_ray/doc_chunk_wf.py | 4 ++-- .../language/doc_chunk/python/pyproject.toml | 4 ++-- transforms/language/doc_chunk/ray/pyproject.toml | 6 +++--- .../kfp_ray/doc_quality_multiple_wf.py | 4 ++-- .../doc_quality/kfp_ray/doc_quality_wf.py | 4 ++-- .../language/doc_quality/python/pyproject.toml | 4 ++-- .../language/doc_quality/ray/pyproject.toml | 6 +++--- .../lang_id/kfp_ray/lang_id_multiple_wf.py | 4 ++-- .../language/lang_id/kfp_ray/lang_id_wf.py | 4 ++-- .../language/lang_id/python/pyproject.toml | 4 ++-- transforms/language/lang_id/ray/pyproject.toml | 6 +++--- .../kfp_ray/pdf2parquet_multiple_wf.py | 4 ++-- .../pdf2parquet/kfp_ray/pdf2parquet_wf.py | 4 ++-- .../language/pdf2parquet/python/pyproject.toml | 4 ++-- .../language/pdf2parquet/ray/pyproject.toml | 6 +++--- .../pii_redactor/kfp_ray/pii_redactor_wf.py | 2 +- .../language/pii_redactor/python/pyproject.toml | 4 ++-- .../language/pii_redactor/ray/pyproject.toml | 6 +++--- .../kfp_ray/text_encoder_multiple_wf.py | 4 ++-- .../text_encoder/kfp_ray/text_encoder_wf.py | 4 ++-- .../language/text_encoder/python/pyproject.toml | 4 ++-- .../language/text_encoder/ray/pyproject.toml | 6 +++--- transforms/packaging/python/pyproject.toml | 2 +- transforms/packaging/python/requirements.txt | 2 +- transforms/packaging/ray/pyproject.toml | 2 +- transforms/packaging/ray/requirements.txt | 4 ++-- transforms/universal/doc_id/kfp_ray/doc_id_wf.py | 4 ++-- .../universal/doc_id/python/pyproject.toml | 4 ++-- transforms/universal/doc_id/ray/pyproject.toml | 6 +++--- transforms/universal/doc_id/spark/pyproject.toml | 4 ++-- transforms/universal/ededup/kfp_ray/ededup_wf.py | 4 ++-- .../universal/ededup/python/pyproject.toml | 4 ++-- transforms/universal/ededup/ray/pyproject.toml | 6 +++--- transforms/universal/fdedup/kfp_ray/fdedup_wf.py | 4 ++-- transforms/universal/fdedup/ray/pyproject.toml | 4 ++-- transforms/universal/filter/kfp_ray/filter_wf.py | 4 ++-- .../universal/filter/python/pyproject.toml | 4 ++-- transforms/universal/filter/ray/pyproject.toml | 6 +++--- transforms/universal/filter/spark/pyproject.toml | 4 ++-- .../universal/html2parquet/python/pyproject.toml | 4 ++-- .../universal/noop/kfp_ray/noop_multiple_wf.py | 4 ++-- transforms/universal/noop/kfp_ray/noop_wf.py | 4 ++-- transforms/universal/noop/python/pyproject.toml | 4 ++-- transforms/universal/noop/ray/pyproject.toml | 6 +++--- transforms/universal/noop/spark/pyproject.toml | 6 +++--- .../universal/profiler/kfp_ray/profiler_wf.py | 4 ++-- transforms/universal/profiler/ray/pyproject.toml | 4 ++-- transforms/universal/resize/kfp_ray/resize_wf.py | 4 ++-- .../universal/resize/python/pyproject.toml | 4 ++-- transforms/universal/resize/ray/pyproject.toml | 6 +++--- .../tokenization/kfp_ray/tokenization_wf.py | 4 ++-- .../universal/tokenization/python/pyproject.toml | 4 ++-- .../universal/tokenization/ray/pyproject.toml | 6 +++--- 85 files changed, 183 insertions(+), 183 deletions(-) diff --git a/.make.versions b/.make.versions index 54e6d8ca1..662782452 100644 --- a/.make.versions +++ b/.make.versions @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2 DPK_MICRO_VERSION=1 # The suffix is generally always set in the main/development branch and only nulled out when creating release branches. # It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi. -DPK_VERSION_SUFFIX=.dev3 +DPK_VERSION_SUFFIX= DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) diff --git a/data-processing-lib/python/pyproject.toml b/data-processing-lib/python/pyproject.toml index 9ff6c2d7f..0c6ec36f8 100644 --- a/data-processing-lib/python/pyproject.toml +++ b/data-processing-lib/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Library" diff --git a/data-processing-lib/ray/pyproject.toml b/data-processing-lib/ray/pyproject.toml index 3f347cdf4..404e1b71b 100644 --- a/data-processing-lib/ray/pyproject.toml +++ b/data-processing-lib/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_ray" -version = "0.2.1.dev3" +version = "0.2.1" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] requires-python = ">=3.10" description = "Data Preparation Toolkit Library for Ray" @@ -11,7 +11,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit>=0.2.1.dev3", + "data-prep-toolkit>=0.2.1", "ray[default]==2.24.0", # These two are to fix security issues identified by quay.io "fastapi>=0.110.2", diff --git a/data-processing-lib/spark/pyproject.toml b/data-processing-lib/spark/pyproject.toml index b6e9edddb..7514f0a50 100644 --- a/data-processing-lib/spark/pyproject.toml +++ b/data-processing-lib/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_spark" -version = "0.2.1.dev3" +version = "0.2.1" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] requires-python = ">=3.10" description = "Data Preparation Toolkit Library for Spark" @@ -11,7 +11,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "pyspark>=3.5.2", "psutil>=6.0.0" ] diff --git a/kfp/kfp_ray_components/createRayClusterComponent.yaml b/kfp/kfp_ray_components/createRayClusterComponent.yaml index 30b0b66d8..36e88d978 100644 --- a/kfp/kfp_ray_components/createRayClusterComponent.yaml +++ b/kfp/kfp_ray_components/createRayClusterComponent.yaml @@ -11,7 +11,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml index 44e199c47..3c3e79b9c 100644 --- a/kfp/kfp_ray_components/deleteRayClusterComponent.yaml +++ b/kfp/kfp_ray_components/deleteRayClusterComponent.yaml @@ -9,7 +9,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeRayJobComponent.yaml b/kfp/kfp_ray_components/executeRayJobComponent.yaml index 7ab517bff..4dccd78d0 100644 --- a/kfp/kfp_ray_components/executeRayJobComponent.yaml +++ b/kfp/kfp_ray_components/executeRayJobComponent.yaml @@ -12,7 +12,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml index 9b98912f0..0b1d4ecef 100644 --- a/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml +++ b/kfp/kfp_ray_components/executeRayJobComponent_multi_s3.yaml @@ -13,7 +13,7 @@ inputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml index 6b261a003..335713462 100644 --- a/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml +++ b/kfp/kfp_ray_components/executeSubWorkflowComponent.yaml @@ -27,7 +27,7 @@ outputs: implementation: container: - image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" + image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # command is a list of strings (command-line arguments). # The YAML language has two syntaxes for lists, and you can use either of them. # Here we use the "flow syntax" - comma-separated strings inside square brackets. diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml index eaea5fb0d..a4291b093 100644 --- a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_v1" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -13,7 +13,7 @@ authors = [ ] dependencies = [ "kfp==1.8.22", - "data-prep-toolkit-kfp-shared==0.2.1.dev3", + "data-prep-toolkit-kfp-shared==0.2.1", ] [build-system] diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml index c5ca32f1a..866ca157d 100644 --- a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_v2" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -14,7 +14,7 @@ authors = [ dependencies = [ "kfp==2.8.0", "kfp-kubernetes==1.2.0", - "data-prep-toolkit-kfp-shared==0.2.1.dev3", + "data-prep-toolkit-kfp-shared==0.2.1", ] [build-system] diff --git a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml index b4f509433..a0fa9e760 100644 --- a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_kfp_shared" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} @@ -14,7 +14,7 @@ authors = [ dependencies = [ "requests", "kubernetes", - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py index 867c83198..a35a9fbf6 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py @@ -17,14 +17,14 @@ run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_tokenization_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -code_to_parquet_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:latest" -proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:latest" -code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest" -malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:latest" -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:latest" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:latest" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:latest" -tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:latest" +code_to_parquet_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.2.1" +proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.1" +code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.1" +malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.1" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.1" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.1" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.1" +tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.1" # Pipeline to invoke execution on remote resource diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py index 8243a65b5..947aece29 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py @@ -12,9 +12,9 @@ run_exact_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:latest" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:latest" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:latest" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.1" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.1" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.1" # Pipeline to invoke execution on remote resource @dsl.pipeline( diff --git a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py index a2080e70a..68fe95384 100644 --- a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py +++ b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py @@ -21,11 +21,11 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "code2parquet_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml index 79f0988be..b6de1913d 100644 --- a/transforms/code/code2parquet/python/pyproject.toml +++ b/transforms/code/code2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "code2parquet Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "parameterized", "pandas", ] diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index c7f1a1563..eaff67e7f 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "code2parquet Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", - "dpk-code2parquet-transform-python==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", + "dpk-code2parquet-transform-python==0.2.1", "parameterized", "pandas", ] diff --git a/transforms/code/code_quality/kfp_ray/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py index 138b5d613..cc2b424e1 100644 --- a/transforms/code/code_quality/kfp_ray/code_quality_wf.py +++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py @@ -21,10 +21,10 @@ EXEC_SCRIPT_NAME: str = "code_quality_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index 88c8f9031..a1d7145c8 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Code Quality Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "bs4==0.0.2", "transformers==4.38.2", ] diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 6925f45c0..8350e6ca9 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Code Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-code-quality-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-code-quality-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py index ba82169c3..426e5fb91 100644 --- a/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py +++ b/transforms/code/header_cleanser/kfp_ray/header_cleanser_wf.py @@ -21,10 +21,10 @@ EXEC_SCRIPT_NAME: str = "header_cleanser_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/header_cleanser/python/pyproject.toml b/transforms/code/header_cleanser/python/pyproject.toml index 2799974b4..7f7d69481 100644 --- a/transforms/code/header_cleanser/python/pyproject.toml +++ b/transforms/code/header_cleanser/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_header_cleanser_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "License and Copyright Removal Transform for Python" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "scancode-toolkit==32.1.0", ] diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml index d40aa9373..e1736ccda 100644 --- a/transforms/code/header_cleanser/ray/pyproject.toml +++ b/transforms/code/header_cleanser/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_header_cleanser_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "License and copyright removal Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" }, ] dependencies = [ - "dpk-header-cleanser-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-header-cleanser-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", "scancode-toolkit==32.1.0", ] diff --git a/transforms/code/malware/kfp_ray/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py index d9ec70b37..5d7b4e5c5 100644 --- a/transforms/code/malware/kfp_ray/malware_wf.py +++ b/transforms/code/malware/kfp_ray/malware_wf.py @@ -21,10 +21,10 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "malware_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index 9e5e122ca..2577e5c2e 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Malware Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "clamd==1.0.2", ] diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index 60d9a3089..b0f2465f2 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Malware Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "dpk-malware-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-malware-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py index 209121cd4..f9f2c4a62 100644 --- a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py +++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py @@ -21,10 +21,10 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "proglang_select_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 7fcef9bfc..5600d447f 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Programming Language Selection Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index 703bf5279..0f7ef7093 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Programming Language Selection Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-proglang-select-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-proglang-select-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py index 256636176..c91963bec 100644 --- a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py +++ b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "repo_level_order_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml index 6f54a65ed..1fd393826 100644 --- a/transforms/code/repo_level_ordering/ray/pyproject.toml +++ b/transforms/code/repo_level_ordering/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_repo_level_order_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "repo_level_order Ray Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Shanmukha Guttula", email = "shagutt1@in.ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", "networkx==3.3", "colorlog==6.8.2", "func-timeout==4.3.5", diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py index a613955c9..eb76a8ec6 100644 --- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py +++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py index 7fb107758..313476604 100644 --- a/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py +++ b/transforms/language/doc_chunk/kfp_ray/doc_chunk_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_chunk-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_chunk_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_chunk/python/pyproject.toml b/transforms/language/doc_chunk/python/pyproject.toml index 4deb09d47..570d65207 100644 --- a/transforms/language/doc_chunk/python/pyproject.toml +++ b/transforms/language/doc_chunk/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_chunk_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "chunk documents Python Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "docling-core==1.3.0", "llama-index-core>=0.11.0,<0.12.0", ] diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml index 19288e2db..27a2e2c31 100644 --- a/transforms/language/doc_chunk/ray/pyproject.toml +++ b/transforms/language/doc_chunk/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_chunk_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "chunk documents Ray Transform" license = {text = "Apache-2.0"} @@ -11,8 +11,8 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "dpk-doc-chunk-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-doc-chunk-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py index c68715b5d..75ebde0d1 100644 --- a/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py +++ b/transforms/language/doc_quality/kfp_ray/doc_quality_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py index b42262468..e334e0d13 100644 --- a/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py +++ b/transforms/language/doc_quality/kfp_ray/doc_quality_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_quality-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_quality_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/doc_quality/python/pyproject.toml b/transforms/language/doc_quality/python/pyproject.toml index e63a6d5e5..1e1da5ec9 100644 --- a/transforms/language/doc_quality/python/pyproject.toml +++ b/transforms/language/doc_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_quality_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Document Quality Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml index 6bc9cc6c6..9950a38be 100644 --- a/transforms/language/doc_quality/ray/pyproject.toml +++ b/transforms/language/doc_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_quality_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Document Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-doc_quality-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3" + "dpk-doc_quality-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1" ] [build-system] diff --git a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py index ecd58b6fe..3696cead6 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/lang_id/kfp_ray/lang_id_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_wf.py index 4f581cf2c..01e041928 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index f2dd72919..a1d023fef 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Language Identification Python Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "fasttext==0.9.2", "langcodes==3.3.0", "huggingface-hub >= 0.21.4, <1.0.0", diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index 4833913a4..dd319ea70 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Language Identification Ray Transform" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-lang_id-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-lang_id-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py index fdfbc1fe4..824facde4 100644 --- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py +++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py index c3bf399fe..a3bf50759 100644 --- a/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py +++ b/transforms/language/pdf2parquet/kfp_ray/pdf2parquet_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/pdf2parquet-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "pdf2parquet_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pdf2parquet/python/pyproject.toml b/transforms/language/pdf2parquet/python/pyproject.toml index 24f2294b5..89f272bed 100644 --- a/transforms/language/pdf2parquet/python/pyproject.toml +++ b/transforms/language/pdf2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pdf2parquet_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PDF2PARQUET Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "docling-core==1.2.0", "docling-ibm-models==1.1.7", "deepsearch-glm==0.21.0", diff --git a/transforms/language/pdf2parquet/ray/pyproject.toml b/transforms/language/pdf2parquet/ray/pyproject.toml index 950e5ce3d..6846e37f4 100644 --- a/transforms/language/pdf2parquet/ray/pyproject.toml +++ b/transforms/language/pdf2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pdf2parquet_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PDF2PARQUET Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Christoph Auer", email = "cau@zurich.ibm.com" }, ] dependencies = [ - "dpk-pdf2parquet-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-pdf2parquet-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py b/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py index f1c4dac98..381865cf3 100644 --- a/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py +++ b/transforms/language/pii_redactor/kfp_ray/pii_redactor_wf.py @@ -23,7 +23,7 @@ EXEC_SCRIPT_NAME: str = "pii_redactor_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/pii_redactor/python/pyproject.toml b/transforms/language/pii_redactor/python/pyproject.toml index a61987a45..91e94d28d 100644 --- a/transforms/language/pii_redactor/python/pyproject.toml +++ b/transforms/language/pii_redactor/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pii_redactor_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PII redactor Transform for Python" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Sowmya.L.R", email = "lrsowmya@gmail.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "presidio-analyzer>=2.2.355", "presidio-anonymizer>=2.2.355", "flair>=0.14.0", diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml index a1b01be94..82d6f87f1 100644 --- a/transforms/language/pii_redactor/ray/pyproject.toml +++ b/transforms/language/pii_redactor/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_pii_redactor_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "PII Redactor Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk_pii_redactor_transform_python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk_pii_redactor_transform_python==0.2.1", + "data-prep-toolkit-ray==0.2.1", "presidio-analyzer>=2.2.355", "presidio-anonymizer>=2.2.355", "flair>=0.14.0", diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py index 120c53c99..1a4eb864f 100644 --- a/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py +++ b/transforms/language/text_encoder/kfp_ray/text_encoder_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py index d402c8832..0a3627b86 100644 --- a/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py +++ b/transforms/language/text_encoder/kfp_ray/text_encoder_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/text_encoder-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "text_encoder_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/language/text_encoder/python/pyproject.toml b/transforms/language/text_encoder/python/pyproject.toml index 1ed8725ab..e7cdf8a26 100644 --- a/transforms/language/text_encoder/python/pyproject.toml +++ b/transforms/language/text_encoder/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_text_encoder_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Text Encoder Python Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ { name = "Peter Staar", email = "taa@zurich.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "sentence-transformers==3.0.1", ] diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml index aa8af8b44..33b5e3a94 100644 --- a/transforms/language/text_encoder/ray/pyproject.toml +++ b/transforms/language/text_encoder/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_text_encoder_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Text Encoder Ray Transform" license = {text = "Apache-2.0"} @@ -11,8 +11,8 @@ authors = [ { name = "Peter Staar", email = "taa@zurich.ibm.com" }, ] dependencies = [ - "dpk-text_encoder-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-text_encoder-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/packaging/python/pyproject.toml b/transforms/packaging/python/pyproject.toml index 5ddb40aae..fdea22080 100644 --- a/transforms/packaging/python/pyproject.toml +++ b/transforms/packaging/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_transforms" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms" diff --git a/transforms/packaging/python/requirements.txt b/transforms/packaging/python/requirements.txt index 6dec1e2de..e4ad2694a 100644 --- a/transforms/packaging/python/requirements.txt +++ b/transforms/packaging/python/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit>=0.2.1.dev3 +data-prep-toolkit>=0.2.1 bs4==0.0.2 #pdf2parquet # conflict with chunking.... diff --git a/transforms/packaging/ray/pyproject.toml b/transforms/packaging/ray/pyproject.toml index 9c1509472..971bb5ef9 100644 --- a/transforms/packaging/ray/pyproject.toml +++ b/transforms/packaging/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "data_prep_toolkit_transforms_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10,<3.12" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms using Ray" diff --git a/transforms/packaging/ray/requirements.txt b/transforms/packaging/ray/requirements.txt index 2e75ae185..fed96622b 100644 --- a/transforms/packaging/ray/requirements.txt +++ b/transforms/packaging/ray/requirements.txt @@ -1,5 +1,5 @@ -data-prep-toolkit-ray>=0.2.1.dev3 -data-prep-toolkit-transforms>=0.2.1.dev3 +data-prep-toolkit-ray>=0.2.1 +data-prep-toolkit-transforms>=0.2.1 scancode-toolkit==32.1.0 ; platform_system != 'Darwin' parameterized tqdm==4.66.3 diff --git a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py index 1eb96af25..5d381ab47 100644 --- a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py +++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py @@ -17,12 +17,12 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/doc_id/python/pyproject.toml b/transforms/universal/doc_id/python/pyproject.toml index 8e4358b28..1e822549b 100644 --- a/transforms/universal/doc_id/python/pyproject.toml +++ b/transforms/universal/doc_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "ededup Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3" + "data-prep-toolkit==0.2.1" ] [build-system] diff --git a/transforms/universal/doc_id/ray/pyproject.toml b/transforms/universal/doc_id/ray/pyproject.toml index e5cb79d95..c8d495d0e 100644 --- a/transforms/universal/doc_id/ray/pyproject.toml +++ b/transforms/universal/doc_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "docid Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk_doc_id_transform_python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3" + "dpk_doc_id_transform_python==0.2.1", + "data-prep-toolkit-ray==0.2.1" ] [build-system] diff --git a/transforms/universal/doc_id/spark/pyproject.toml b/transforms/universal/doc_id/spark/pyproject.toml index 13d7bc2c3..f15c84305 100644 --- a/transforms/universal/doc_id/spark/pyproject.toml +++ b/transforms/universal/doc_id/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_doc_id_transform_spark" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Doc ID Spark Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-spark==0.2.1.dev3", + "data-prep-toolkit-spark==0.2.1", ] [build-system] diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py index 306391d6c..ed80a2084 100644 --- a/transforms/universal/ededup/kfp_ray/ededup_wf.py +++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/ededup/python/pyproject.toml b/transforms/universal/ededup/python/pyproject.toml index e380bf58e..119caaa76 100644 --- a/transforms/universal/ededup/python/pyproject.toml +++ b/transforms/universal/ededup/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_ededup_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "ededup Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "mmh3==4.1.0", "xxhash==3.4.1", ] diff --git a/transforms/universal/ededup/ray/pyproject.toml b/transforms/universal/ededup/ray/pyproject.toml index 2fdf82392..0b444de24 100644 --- a/transforms/universal/ededup/ray/pyproject.toml +++ b/transforms/universal/ededup/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_ededup_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "ededup Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", - "dpk_ededup_transform_python==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", + "dpk_ededup_transform_python==0.2.1", "tqdm==4.66.3", ] diff --git a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py index c98ffafa3..2937d2bdb 100644 --- a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py +++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "fdedup_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/fdedup/ray/pyproject.toml b/transforms/universal/fdedup/ray/pyproject.toml index 70f92a23f..95ce1a883 100644 --- a/transforms/universal/fdedup/ray/pyproject.toml +++ b/transforms/universal/fdedup/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_fdedup_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "fdedup Ray Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", "mmh3==4.1.0", "xxhash==3.4.1", "tqdm==4.66.3", diff --git a/transforms/universal/filter/kfp_ray/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py index b998cd7b5..ac87d1d7c 100644 --- a/transforms/universal/filter/kfp_ray/filter_wf.py +++ b/transforms/universal/filter/kfp_ray/filter_wf.py @@ -21,10 +21,10 @@ EXEC_SCRIPT_NAME: str = "filter_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml index 995247f4f..117ba6023 100644 --- a/transforms/universal/filter/python/pyproject.toml +++ b/transforms/universal/filter/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Filter Transform for Python" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "duckdb==0.10.1", ] diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index fc0035475..1dd1efeee 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Filter Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "dpk-filter-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-filter-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/universal/filter/spark/pyproject.toml b/transforms/universal/filter/spark/pyproject.toml index 4d31c2ef2..f82807e9a 100644 --- a/transforms/universal/filter/spark/pyproject.toml +++ b/transforms/universal/filter/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_spark" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Filter Spark Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-spark==0.2.1.dev3", + "data-prep-toolkit-spark==0.2.1", ] [project.optional-dependencies] diff --git a/transforms/universal/html2parquet/python/pyproject.toml b/transforms/universal/html2parquet/python/pyproject.toml index f49c498d6..de6b606f4 100644 --- a/transforms/universal/html2parquet/python/pyproject.toml +++ b/transforms/universal/html2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_html2parquet_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "HTML2PARQUET Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Syed Zawad", email = "szawad@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "trafilatura==1.12.0" ] diff --git a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py index a1f6592a8..2781153c1 100644 --- a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py @@ -17,13 +17,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/noop/kfp_ray/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py index 67405f134..ebeff5f33 100644 --- a/transforms/universal/noop/kfp_ray/noop_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml index 5714e70de..8f2ec097e 100644 --- a/transforms/universal/noop/python/pyproject.toml +++ b/transforms/universal/noop/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "NOOP Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index 9f1353b4e..e6d9cbddb 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "NOOP Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-noop-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml index 965770d92..8ee6b7f54 100644 --- a/transforms/universal/noop/spark/pyproject.toml +++ b/transforms/universal/noop/spark/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_spark" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "NOOP Spark Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsk@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.2.1.dev3", - "data-prep-toolkit-spark==0.2.1.dev3", + "dpk-noop-transform-python==0.2.1", + "data-prep-toolkit-spark==0.2.1", ] [build-system] diff --git a/transforms/universal/profiler/kfp_ray/profiler_wf.py b/transforms/universal/profiler/kfp_ray/profiler_wf.py index 7f21fa3e0..3d14a59a8 100644 --- a/transforms/universal/profiler/kfp_ray/profiler_wf.py +++ b/transforms/universal/profiler/kfp_ray/profiler_wf.py @@ -18,13 +18,13 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "profiler_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/profiler/ray/pyproject.toml b/transforms/universal/profiler/ray/pyproject.toml index 1473b88b4..555b74b2a 100644 --- a/transforms/universal/profiler/ray/pyproject.toml +++ b/transforms/universal/profiler/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_profiler_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "profiler Ray Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.1.dev3", + "data-prep-toolkit-ray==0.2.1", "mmh3==4.1.0", "xxhash==3.4.1", "tqdm==4.66.3", diff --git a/transforms/universal/resize/kfp_ray/resize_wf.py b/transforms/universal/resize/kfp_ray/resize_wf.py index f9b325674..4b95ff774 100644 --- a/transforms/universal/resize/kfp_ray/resize_wf.py +++ b/transforms/universal/resize/kfp_ray/resize_wf.py @@ -17,12 +17,12 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/resize-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/resize-ray:0.2.1" # the name of the job script EXEC_SCRIPT_NAME: str = "resize_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" diff --git a/transforms/universal/resize/python/pyproject.toml b/transforms/universal/resize/python/pyproject.toml index b1cc13314..b1abb776a 100644 --- a/transforms/universal/resize/python/pyproject.toml +++ b/transforms/universal/resize/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_python" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "resize Python Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", ] [build-system] diff --git a/transforms/universal/resize/ray/pyproject.toml b/transforms/universal/resize/ray/pyproject.toml index 86834c1b1..9a9f2afc0 100644 --- a/transforms/universal/resize/ray/pyproject.toml +++ b/transforms/universal/resize/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_resize_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Resize Ray Transform" license = {text = "Apache-2.0"} @@ -10,8 +10,8 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-resize-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-resize-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] diff --git a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py index ba96a790a..8cceb9b0e 100644 --- a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py +++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py @@ -20,10 +20,10 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "tokenization_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:latest" +task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.1" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.1" # path to kfp component specifications files # path to kfp component specifications files diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml index 1dc0ca104..68bd2d9bc 100644 --- a/transforms/universal/tokenization/python/pyproject.toml +++ b/transforms/universal/tokenization/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_tokenization_transform_python" keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Tokenization Transform for Python" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "data-prep-toolkit==0.2.1.dev3", + "data-prep-toolkit==0.2.1", "transformers==4.38.2", ] diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index fd259a9b6..84b246625 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_tokenization_transform_ray" -version = "0.2.1.dev3" +version = "0.2.1" requires-python = ">=3.10" description = "Tokenization Transform for Ray" license = {text = "Apache-2.0"} @@ -9,8 +9,8 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "dpk-tokenization-transform-python==0.2.1.dev3", - "data-prep-toolkit-ray==0.2.1.dev3", + "dpk-tokenization-transform-python==0.2.1", + "data-prep-toolkit-ray==0.2.1", ] [build-system] From 41ca0691b1c0e1ec9abf13722b42f611b20d9e3d Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 24 Sep 2024 23:20:13 +0200 Subject: [PATCH 02/11] Updated release notes for v0.2.1 Signed-off-by: Maroun Touma --- release-notes.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/release-notes.md b/release-notes.md index 52aca0930..7bd7b47f3 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,5 +1,41 @@ # Data Prep Kit Release notes +## Release 0.2.1 - 9/24/2024 + +### General +1. Bug fixes across the repo +1. Added AI Alliance RAG demo, tutorials and notebooks and tips for running on google colab +1. Added new transforms and single package for transforms published to pypi +1. improved CI/CD with targeted workflow triggered on specific changes to specific modules +1. New enhancements for cutting a release + + +### data-prep-toolkit libraries (python, ray, spark) + +1. Restructure the repository to distinguish/separate runtime libraries +1. Split data-processing-lib/ray into python and ray +1. Spark runtime +1. updated pyarrow version +1. define required transform() method as abstract to AbstractTableTransform +1. Enables configuration of makefile to use src or pypi for data-prep-kit library dependencies + + +### KFP Workloads + +1. Update kfp image version +1. Enable kfp in GH action for testing randomly selected workflow and prevent kfp test for transforms that do not support it +1. Auto generate kfp pipelines +1. Combine the common KFP support code in a shared library +1. Update K8s cluster deployment and remove creation of clusterrolebinding in kubeflow installation + + +### Transforms + +1. Added 7 new transdforms including: language identification, profiler, repo level ordering, doc quality, pdf2parquet, HTML2Parquet and PII Transform +1. Added ededup python implementation and incremental ededup +1. Added fuzzy floating point comparison + + ## Release 0.2.0 - 6/27/2024 ### General From b74e6b686b26cef7a3140775afd6e38e71f83409 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 00:04:29 +0200 Subject: [PATCH 03/11] fix docker tag to match release Signed-off-by: Maroun Touma --- transforms/universal/doc_id/spark/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transforms/universal/doc_id/spark/Dockerfile b/transforms/universal/doc_id/spark/Dockerfile index 2b529de8d..42512d7e8 100644 --- a/transforms/universal/doc_id/spark/Dockerfile +++ b/transforms/universal/doc_id/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1.dev0 +ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1 FROM ${BASE_IMAGE} USER root From 25997102889cf51c9e945fa20b61018246593deb Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 00:07:38 +0200 Subject: [PATCH 04/11] fix release tag for spark image Signed-off-by: Maroun Touma --- transforms/universal/filter/spark/Dockerfile | 2 +- transforms/universal/noop/spark/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/transforms/universal/filter/spark/Dockerfile b/transforms/universal/filter/spark/Dockerfile index 92eebbee8..b44e0dae6 100644 --- a/transforms/universal/filter/spark/Dockerfile +++ b/transforms/universal/filter/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1.dev0 +ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1 FROM ${BASE_IMAGE} USER root diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index e72cb06ae..b7c61a07a 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1.dev0 +ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1 FROM ${BASE_IMAGE} USER root From 5c8bd5d1866cf243bfa59c5bb9e3b83417d510ef Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 07:49:12 +0200 Subject: [PATCH 05/11] fix tag for latest spark image Signed-off-by: Maroun Touma --- transforms/universal/doc_id/spark/Dockerfile | 2 +- transforms/universal/filter/spark/Dockerfile | 2 +- transforms/universal/noop/spark/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/transforms/universal/doc_id/spark/Dockerfile b/transforms/universal/doc_id/spark/Dockerfile index 42512d7e8..dc8f5faf3 100644 --- a/transforms/universal/doc_id/spark/Dockerfile +++ b/transforms/universal/doc_id/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1 +ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root diff --git a/transforms/universal/filter/spark/Dockerfile b/transforms/universal/filter/spark/Dockerfile index b44e0dae6..8131c8d07 100644 --- a/transforms/universal/filter/spark/Dockerfile +++ b/transforms/universal/filter/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1 +ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index b7c61a07a..deb54f974 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:0.2.1 +ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root From c8333ff3050b7955941d768d127fea4b70f89c8d Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 13:18:44 +0200 Subject: [PATCH 06/11] Missing base image in quay.io Signed-off-by: Maroun Touma --- release-notes.md | 13 ++++--------- transforms/universal/doc_id/spark/Dockerfile | 3 ++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/release-notes.md b/release-notes.md index 7bd7b47f3..8d114b8ff 100644 --- a/release-notes.md +++ b/release-notes.md @@ -6,7 +6,7 @@ 1. Bug fixes across the repo 1. Added AI Alliance RAG demo, tutorials and notebooks and tips for running on google colab 1. Added new transforms and single package for transforms published to pypi -1. improved CI/CD with targeted workflow triggered on specific changes to specific modules +1. Improved CI/CD with targeted workflow triggered on specific changes to specific modules 1. New enhancements for cutting a release @@ -15,19 +15,14 @@ 1. Restructure the repository to distinguish/separate runtime libraries 1. Split data-processing-lib/ray into python and ray 1. Spark runtime -1. updated pyarrow version -1. define required transform() method as abstract to AbstractTableTransform +1. Updated pyarrow version +1. Define required transform() method as abstract to AbstractTableTransform 1. Enables configuration of makefile to use src or pypi for data-prep-kit library dependencies ### KFP Workloads -1. Update kfp image version -1. Enable kfp in GH action for testing randomly selected workflow and prevent kfp test for transforms that do not support it -1. Auto generate kfp pipelines -1. Combine the common KFP support code in a shared library -1. Update K8s cluster deployment and remove creation of clusterrolebinding in kubeflow installation - +1. Add a configurable timeout before destroying the deployed Ray cluster. ### Transforms diff --git a/transforms/universal/doc_id/spark/Dockerfile b/transforms/universal/doc_id/spark/Dockerfile index dc8f5faf3..861146cab 100644 --- a/transforms/universal/doc_id/spark/Dockerfile +++ b/transforms/universal/doc_id/spark/Dockerfile @@ -1,4 +1,5 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest +#ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest FROM ${BASE_IMAGE} USER root From 8a794cbb9bac7a2c6308ecfefd0eaceef38f68f9 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 13:20:49 +0200 Subject: [PATCH 07/11] missing base image in quay.io Signed-off-by: Maroun Touma --- transforms/universal/filter/spark/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/transforms/universal/filter/spark/Dockerfile b/transforms/universal/filter/spark/Dockerfile index 8131c8d07..f6dea8858 100644 --- a/transforms/universal/filter/spark/Dockerfile +++ b/transforms/universal/filter/spark/Dockerfile @@ -1,4 +1,5 @@ ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:0.2.1 FROM ${BASE_IMAGE} USER root From 28e73a228f98fa6b9082787a63378ca3ebd293b1 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 14:23:00 +0200 Subject: [PATCH 08/11] use image from cache Signed-off-by: Maroun Touma --- transforms/universal/doc_id/spark/Dockerfile | 5 ++--- transforms/universal/filter/spark/Dockerfile | 5 ++--- transforms/universal/noop/spark/Dockerfile | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/transforms/universal/doc_id/spark/Dockerfile b/transforms/universal/doc_id/spark/Dockerfile index 861146cab..7176a9c30 100644 --- a/transforms/universal/doc_id/spark/Dockerfile +++ b/transforms/universal/doc_id/spark/Dockerfile @@ -1,6 +1,5 @@ -#ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest -ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest -FROM ${BASE_IMAGE} +ARG SPARK_BASE_IMAGE=data-prep-kit-spark-3.5.2:latest +FROM ${SPARK_BASE_IMAGE} USER root # install pytest diff --git a/transforms/universal/filter/spark/Dockerfile b/transforms/universal/filter/spark/Dockerfile index f6dea8858..12ebb4b95 100644 --- a/transforms/universal/filter/spark/Dockerfile +++ b/transforms/universal/filter/spark/Dockerfile @@ -1,6 +1,5 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest -ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:0.2.1 -FROM ${BASE_IMAGE} +ARG SPARK_BASE_IMAGE=data-prep-kit-spark-3.5.2:latest +FROM ${SPARK_BASE_IMAGE} USER root # install pytest diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index deb54f974..94ba896a8 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -1,5 +1,5 @@ -ARG BASE_IMAGE=quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest -FROM ${BASE_IMAGE} +ARG SPARK_BASE_IMAGE=data-prep-kit-spark-3.5.2:latest +FROM ${SPARK_BASE_IMAGE} USER root # install pytest From df0e27b89c88196603b117a6051aafa340a0f446 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 16:01:04 +0200 Subject: [PATCH 09/11] Build pckages for the release using src folder only (no test) Signed-off-by: Maroun Touma --- data-processing-lib/python/pyproject.toml | 2 +- data-processing-lib/ray/pyproject.toml | 2 +- transforms/code/code2parquet/python/pyproject.toml | 2 +- transforms/code/code2parquet/ray/pyproject.toml | 2 +- transforms/code/code_quality/python/pyproject.toml | 2 +- transforms/code/code_quality/ray/pyproject.toml | 2 +- transforms/code/header_cleanser/python/pyproject.toml | 2 +- transforms/code/header_cleanser/ray/pyproject.toml | 2 +- transforms/code/malware/python/pyproject.toml | 2 +- transforms/code/malware/ray/pyproject.toml | 2 +- transforms/code/proglang_select/python/pyproject.toml | 2 +- transforms/code/proglang_select/ray/pyproject.toml | 2 +- transforms/code/repo_level_ordering/ray/pyproject.toml | 2 +- transforms/language/doc_chunk/python/pyproject.toml | 2 +- transforms/language/doc_chunk/ray/pyproject.toml | 2 +- transforms/language/doc_quality/python/pyproject.toml | 2 +- transforms/language/doc_quality/ray/pyproject.toml | 2 +- transforms/language/lang_id/python/pyproject.toml | 2 +- transforms/language/lang_id/ray/pyproject.toml | 2 +- transforms/language/pdf2parquet/python/pyproject.toml | 2 +- transforms/language/pdf2parquet/ray/pyproject.toml | 2 +- transforms/language/pii_redactor/python/pyproject.toml | 2 +- transforms/language/pii_redactor/ray/pyproject.toml | 2 +- transforms/language/text_encoder/python/pyproject.toml | 2 +- transforms/language/text_encoder/ray/pyproject.toml | 2 +- transforms/universal/noop/spark/Dockerfile | 4 ++-- 26 files changed, 27 insertions(+), 27 deletions(-) diff --git a/data-processing-lib/python/pyproject.toml b/data-processing-lib/python/pyproject.toml index 0c6ec36f8..d95cb3075 100644 --- a/data-processing-lib/python/pyproject.toml +++ b/data-processing-lib/python/pyproject.toml @@ -41,7 +41,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/data_processing"] diff --git a/data-processing-lib/ray/pyproject.toml b/data-processing-lib/ray/pyproject.toml index 404e1b71b..c67ad8a9c 100644 --- a/data-processing-lib/ray/pyproject.toml +++ b/data-processing-lib/ray/pyproject.toml @@ -42,7 +42,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/data_processing_ray"] diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml index b6de1913d..c849a0de5 100644 --- a/transforms/code/code2parquet/python/pyproject.toml +++ b/transforms/code/code2parquet/python/pyproject.toml @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index eaff67e7f..21c34ec32 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index a1d7145c8..58c0ed6eb 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 8350e6ca9..e62dfed02 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/header_cleanser/python/pyproject.toml b/transforms/code/header_cleanser/python/pyproject.toml index 7f7d69481..14b609854 100644 --- a/transforms/code/header_cleanser/python/pyproject.toml +++ b/transforms/code/header_cleanser/python/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml index e1736ccda..8c05f2099 100644 --- a/transforms/code/header_cleanser/ray/pyproject.toml +++ b/transforms/code/header_cleanser/ray/pyproject.toml @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index 2577e5c2e..707c3a106 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index b0f2465f2..8e221d732 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 5600d447f..da05b8acf 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -30,7 +30,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index 0f7ef7093..ae778f40d 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml index 1fd393826..7cc8bdb41 100644 --- a/transforms/code/repo_level_ordering/ray/pyproject.toml +++ b/transforms/code/repo_level_ordering/ray/pyproject.toml @@ -37,7 +37,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_chunk/python/pyproject.toml b/transforms/language/doc_chunk/python/pyproject.toml index 570d65207..ddcadba5b 100644 --- a/transforms/language/doc_chunk/python/pyproject.toml +++ b/transforms/language/doc_chunk/python/pyproject.toml @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml index 27a2e2c31..76e368c27 100644 --- a/transforms/language/doc_chunk/ray/pyproject.toml +++ b/transforms/language/doc_chunk/ray/pyproject.toml @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_quality/python/pyproject.toml b/transforms/language/doc_quality/python/pyproject.toml index 1e1da5ec9..c8d9fa11c 100644 --- a/transforms/language/doc_quality/python/pyproject.toml +++ b/transforms/language/doc_quality/python/pyproject.toml @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml index 9950a38be..4d642384e 100644 --- a/transforms/language/doc_quality/ray/pyproject.toml +++ b/transforms/language/doc_quality/ray/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index a1d023fef..240dbdd49 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index dd319ea70..be96eea96 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -31,7 +31,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pdf2parquet/python/pyproject.toml b/transforms/language/pdf2parquet/python/pyproject.toml index 89f272bed..bc8f29c52 100644 --- a/transforms/language/pdf2parquet/python/pyproject.toml +++ b/transforms/language/pdf2parquet/python/pyproject.toml @@ -36,7 +36,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pdf2parquet/ray/pyproject.toml b/transforms/language/pdf2parquet/ray/pyproject.toml index 6846e37f4..57deeedc1 100644 --- a/transforms/language/pdf2parquet/ray/pyproject.toml +++ b/transforms/language/pdf2parquet/ray/pyproject.toml @@ -32,7 +32,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pii_redactor/python/pyproject.toml b/transforms/language/pii_redactor/python/pyproject.toml index 91e94d28d..7931e1ece 100644 --- a/transforms/language/pii_redactor/python/pyproject.toml +++ b/transforms/language/pii_redactor/python/pyproject.toml @@ -34,7 +34,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml index 82d6f87f1..a6900b23d 100644 --- a/transforms/language/pii_redactor/ray/pyproject.toml +++ b/transforms/language/pii_redactor/ray/pyproject.toml @@ -36,7 +36,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/text_encoder/python/pyproject.toml b/transforms/language/text_encoder/python/pyproject.toml index e7cdf8a26..615cff94b 100644 --- a/transforms/language/text_encoder/python/pyproject.toml +++ b/transforms/language/text_encoder/python/pyproject.toml @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml index 33b5e3a94..cd9580692 100644 --- a/transforms/language/text_encoder/ray/pyproject.toml +++ b/transforms/language/text_encoder/ray/pyproject.toml @@ -33,7 +33,7 @@ dev = [ ] [options] -package_dir = ["src","test"] +package_dir = ["src"] [options.packages.find] where = ["src/"] diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index 94ba896a8..47c7134b3 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -1,5 +1,5 @@ -ARG SPARK_BASE_IMAGE=data-prep-kit-spark-3.5.2:latest -FROM ${SPARK_BASE_IMAGE} +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest +FROM ${BASE_IMAGE} USER root # install pytest From 44ce92dfb9129b1fe92d0001f36f6b6bc81cbd4c Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 25 Sep 2024 12:31:04 -0400 Subject: [PATCH 10/11] fix base spark image tag to use .make.versions instead of hardcoded to latest Signed-off-by: David Wood --- data-processing-lib/spark/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/data-processing-lib/spark/Makefile b/data-processing-lib/spark/Makefile index d4d01ba74..51290ccf5 100644 --- a/data-processing-lib/spark/Makefile +++ b/data-processing-lib/spark/Makefile @@ -4,7 +4,6 @@ include $(REPOROOT)/.make.defaults SPARK_VERSION=3.5.2 DOCKER_IMAGE_NAME=data-prep-kit-spark-$(SPARK_VERSION) DOCKER_IMAGE_LIB_NAME=data-prep-kit-spark -DOCKER_IMAGE_VERSION := latest .check-env:: From 446b45a65963a43f425614fe9445a2955a5e2c02 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 25 Sep 2024 19:40:13 +0200 Subject: [PATCH 11/11] redoing fix after breakeage with patch added by David Signed-off-by: Maroun Touma --- transforms/universal/doc_id/spark/Dockerfile | 4 ++-- transforms/universal/filter/spark/Dockerfile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/transforms/universal/doc_id/spark/Dockerfile b/transforms/universal/doc_id/spark/Dockerfile index 7176a9c30..6dd3cb63d 100644 --- a/transforms/universal/doc_id/spark/Dockerfile +++ b/transforms/universal/doc_id/spark/Dockerfile @@ -1,5 +1,5 @@ -ARG SPARK_BASE_IMAGE=data-prep-kit-spark-3.5.2:latest -FROM ${SPARK_BASE_IMAGE} +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest +FROM ${BASE_IMAGE} USER root # install pytest diff --git a/transforms/universal/filter/spark/Dockerfile b/transforms/universal/filter/spark/Dockerfile index 12ebb4b95..75bd07aad 100644 --- a/transforms/universal/filter/spark/Dockerfile +++ b/transforms/universal/filter/spark/Dockerfile @@ -1,5 +1,5 @@ -ARG SPARK_BASE_IMAGE=data-prep-kit-spark-3.5.2:latest -FROM ${SPARK_BASE_IMAGE} +ARG BASE_IMAGE=data-prep-kit-spark-3.5.2:latest +FROM ${BASE_IMAGE} USER root # install pytest