From 67612fc5c6aaee1c2f65ca9997cba55dd403828f Mon Sep 17 00:00:00 2001 From: Revital Sur Date: Thu, 3 Oct 2024 05:25:40 -0500 Subject: [PATCH] Cherry pick Boris's 05b97feb7 commit to removed None not supported by kfpV2. Signed-off-by: Revital Sur Co-authored-by: Boris Lublinsky --- transforms/universal/ededup/kfp_ray/ededup_wf.py | 2 +- .../ededup/kfp_ray/src/ededup_compute_execution_params.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py index 645902d0e..ff4b4db57 100644 --- a/transforms/universal/ededup/kfp_ray/ededup_wf.py +++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py @@ -89,7 +89,7 @@ def ededup( ededup_hash_cpu: float = 0.5, ededup_doc_column: str = "contents", ededup_use_snapshot: bool = False, - ededup_snapshot_directory: str = None, + ededup_snapshot_directory: str = "", # data sampling ededup_n_samples: int = 10, # additional parameters diff --git a/transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py b/transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py index 516e4f38c..6f8197877 100644 --- a/transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py +++ b/transforms/universal/ededup/kfp_ray/src/ededup_compute_execution_params.py @@ -111,6 +111,9 @@ def ededup_compute_execution_params( print(f"Try to increase the size of the cluster or increase size of the cpu per worker") sys.exit(1) print(f"Projected execution time {EXECUTION_OF_KB_DOC * avg_doc_size * number_of_docs / n_workers / 60} min") + # process None able parameters + if ededup_snapshot_directory is None or len(ededup_snapshot_directory) <= 1: + ededup_snapshot_directory = None return { "data_s3_config": data_s3_config, "data_max_files": data_max_files,