From 5733eb17f9c03902a04d4cc003f71fa0eed2c909 Mon Sep 17 00:00:00 2001 From: TAI-YANG <72863539+SUNGOD3@users.noreply.github.com> Date: Mon, 4 Mar 2024 16:50:19 +0800 Subject: [PATCH 1/6] Optimize levenshtein_distance algorithm in peft_lora_seq2seq_accelerate_ds_zero3_offload.py This commit refines the levenshtein_distance algorithm implemented in peft_lora_seq2seq_accelerate_ds_zero3_offload.py to improve its space complexity from O(n^2) to O(n). Additionally, thorough testing has been conducted to ensure the correctness and reliability of the revised implementation. --- ...lora_seq2seq_accelerate_ds_zero3_offload.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index 8b0e748bdf..dcae6f7935 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -17,23 +17,23 @@ def levenshtein_distance(str1, str2): # TC: O(N^2) - # SC: O(N^2) + # SC: O(N) if str1 == str2: return 0 num_rows = len(str1) + 1 num_cols = len(str2) + 1 - dp_matrix = np.empty((num_rows, num_cols)) - dp_matrix[0, :] = range(num_cols) - dp_matrix[:, 0] = range(num_rows) - + dp_matrix = [i for i in range(num_cols)] for i in range(1, num_rows): + prev = dp_matrix[0] + dp_matrix[0] = i for j in range(1, num_cols): + temp = dp_matrix[j] if str1[i - 1] == str2[j - 1]: - dp_matrix[i, j] = dp_matrix[i - 1, j - 1] + dp_matrix[j] = prev else: - dp_matrix[i, j] = min(dp_matrix[i - 1, j - 1], dp_matrix[i - 1, j], dp_matrix[i, j - 1]) + 1 - - return dp_matrix[num_rows - 1, num_cols - 1] + dp_matrix[j] = min(prev, dp_matrix[j], dp_matrix[j - 1]) + 1 + prev = temp + return dp_matrix[num_cols - 1] def get_closest_label(eval_pred, classes): From 1cf94903e7668bc5bd1dc31b7d52ad0d8e25c841 Mon Sep 17 00:00:00 2001 From: TAI-YANG <72863539+SUNGOD3@users.noreply.github.com> Date: Wed, 6 Mar 2024 03:41:47 +0800 Subject: [PATCH 2/6] Update peft_lora_clm_accelerate_ds_zero3_offload.py same --- ...eft_lora_clm_accelerate_ds_zero3_offload.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py b/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py index 8b4a0af239..a820955685 100644 --- a/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py +++ b/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py @@ -23,23 +23,23 @@ def levenshtein_distance(str1, str2): # TC: O(N^2) - # SC: O(N^2) + # SC: O(N) if str1 == str2: return 0 num_rows = len(str1) + 1 num_cols = len(str2) + 1 - dp_matrix = np.empty((num_rows, num_cols)) - dp_matrix[0, :] = range(num_cols) - dp_matrix[:, 0] = range(num_rows) - + dp_matrix = [i for i in range(num_cols)] for i in range(1, num_rows): + prev = dp_matrix[0] + dp_matrix[0] = i for j in range(1, num_cols): + temp = dp_matrix[j] if str1[i - 1] == str2[j - 1]: - dp_matrix[i, j] = dp_matrix[i - 1, j - 1] + dp_matrix[j] = prev else: - dp_matrix[i, j] = min(dp_matrix[i - 1, j - 1], dp_matrix[i - 1, j], dp_matrix[i, j - 1]) + 1 - - return dp_matrix[num_rows - 1, num_cols - 1] + dp_matrix[j] = min(prev, dp_matrix[j], dp_matrix[j - 1]) + 1 + prev = temp + return dp_matrix[num_cols - 1] def get_closest_label(eval_pred, classes): From 6067b1ca3ce27187f5a1559a9d8c1342499e8171 Mon Sep 17 00:00:00 2001 From: TAI-YANG <72863539+SUNGOD3@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:17:28 +0800 Subject: [PATCH 3/6] Improve coding style To pass make style. --- .../peft_lora_clm_accelerate_ds_zero3_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py b/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py index a820955685..453c8be91d 100644 --- a/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py +++ b/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py @@ -28,7 +28,7 @@ def levenshtein_distance(str1, str2): return 0 num_rows = len(str1) + 1 num_cols = len(str2) + 1 - dp_matrix = [i for i in range(num_cols)] + dp_matrix = list(range(num_cols)) for i in range(1, num_rows): prev = dp_matrix[0] dp_matrix[0] = i From 5ecdc622a347c34cf92075adbde12d2224372d84 Mon Sep 17 00:00:00 2001 From: TAI-YANG <72863539+SUNGOD3@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:18:23 +0800 Subject: [PATCH 4/6] Update peft_lora_seq2seq_accelerate_ds_zero3_offload.py same --- .../peft_lora_seq2seq_accelerate_ds_zero3_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index dcae6f7935..681d1046b6 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -22,7 +22,7 @@ def levenshtein_distance(str1, str2): return 0 num_rows = len(str1) + 1 num_cols = len(str2) + 1 - dp_matrix = [i for i in range(num_cols)] + dp_matrix = list(range(num_cols)) for i in range(1, num_rows): prev = dp_matrix[0] dp_matrix[0] = i From 556bfd9c0228f878d41b41af28d1513181abf4ab Mon Sep 17 00:00:00 2001 From: TAI-YANG <72863539+SUNGOD3@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:59:02 +0800 Subject: [PATCH 5/6] np-- Delete unused numpy --- .../peft_lora_clm_accelerate_ds_zero3_offload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py b/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py index 453c8be91d..423fba511c 100644 --- a/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py +++ b/examples/causal_language_modeling/peft_lora_clm_accelerate_ds_zero3_offload.py @@ -3,7 +3,6 @@ import sys import threading -import numpy as np import psutil import torch from accelerate import Accelerator From bbb854c04ca208b99c36be3693b445fc9ab8f102 Mon Sep 17 00:00:00 2001 From: TAI-YANG <72863539+SUNGOD3@users.noreply.github.com> Date: Thu, 7 Mar 2024 00:02:35 +0800 Subject: [PATCH 6/6] Update peft_lora_seq2seq_accelerate_ds_zero3_offload.py same --- .../peft_lora_seq2seq_accelerate_ds_zero3_offload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index 681d1046b6..baf902cbbc 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -3,7 +3,6 @@ import sys import threading -import numpy as np import psutil import torch from accelerate import Accelerator