Skip to content

Commit

Permalink
Cherry pick Boris's 05b97fe commit to removed None not supported by k…
Browse files Browse the repository at this point in the history
…fpV2.

Signed-off-by: Revital Sur <eres@il.ibm.com>
Co-authored-by: Boris Lublinsky <blublinsky@ibm.com>
  • Loading branch information
revit13 and Boris Lublinsky committed Oct 3, 2024
1 parent 0ce13de commit 67612fc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion transforms/universal/ededup/kfp_ray/ededup_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def ededup(
ededup_hash_cpu: float = 0.5,
ededup_doc_column: str = "contents",
ededup_use_snapshot: bool = False,
ededup_snapshot_directory: str = None,
ededup_snapshot_directory: str = "",
# data sampling
ededup_n_samples: int = 10,
# additional parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def ededup_compute_execution_params(
print(f"Try to increase the size of the cluster or increase size of the cpu per worker")
sys.exit(1)
print(f"Projected execution time {EXECUTION_OF_KB_DOC * avg_doc_size * number_of_docs / n_workers / 60} min")
# process None able parameters
if ededup_snapshot_directory is None or len(ededup_snapshot_directory) <= 1:
ededup_snapshot_directory = None
return {
"data_s3_config": data_s3_config,
"data_max_files": data_max_files,
Expand Down

0 comments on commit 67612fc

Please sign in to comment.