Skip to content

Commit

Permalink
Optimize levenshtein algorithm in scripts (#1527)
Browse files Browse the repository at this point in the history
This commit refines the levenshtein_distance algorithm implemented in peft_lora_seq2seq_accelerate_ds_zero3_offload.py to improve its space
complexity from O(n^2) to O(n). Additionally, thorough testing has been
conducted to ensure the correctness and reliability of the revised
implementation.
Also update peft_lora_clm_accelerate_ds_zero3_offload.py
  • Loading branch information
SUNGOD3 authored Mar 7, 2024
1 parent e7e95c0 commit 7e84dec
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import sys
import threading

import numpy as np
import psutil
import torch
from accelerate import Accelerator
Expand All @@ -23,23 +22,23 @@

def levenshtein_distance(str1, str2):
# TC: O(N^2)
# SC: O(N^2)
# SC: O(N)
if str1 == str2:
return 0
num_rows = len(str1) + 1
num_cols = len(str2) + 1
dp_matrix = np.empty((num_rows, num_cols))
dp_matrix[0, :] = range(num_cols)
dp_matrix[:, 0] = range(num_rows)

dp_matrix = list(range(num_cols))
for i in range(1, num_rows):
prev = dp_matrix[0]
dp_matrix[0] = i
for j in range(1, num_cols):
temp = dp_matrix[j]
if str1[i - 1] == str2[j - 1]:
dp_matrix[i, j] = dp_matrix[i - 1, j - 1]
dp_matrix[j] = prev
else:
dp_matrix[i, j] = min(dp_matrix[i - 1, j - 1], dp_matrix[i - 1, j], dp_matrix[i, j - 1]) + 1

return dp_matrix[num_rows - 1, num_cols - 1]
dp_matrix[j] = min(prev, dp_matrix[j], dp_matrix[j - 1]) + 1
prev = temp
return dp_matrix[num_cols - 1]


def get_closest_label(eval_pred, classes):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import sys
import threading

import numpy as np
import psutil
import torch
from accelerate import Accelerator
Expand All @@ -17,23 +16,23 @@

def levenshtein_distance(str1, str2):
# TC: O(N^2)
# SC: O(N^2)
# SC: O(N)
if str1 == str2:
return 0
num_rows = len(str1) + 1
num_cols = len(str2) + 1
dp_matrix = np.empty((num_rows, num_cols))
dp_matrix[0, :] = range(num_cols)
dp_matrix[:, 0] = range(num_rows)

dp_matrix = list(range(num_cols))
for i in range(1, num_rows):
prev = dp_matrix[0]
dp_matrix[0] = i
for j in range(1, num_cols):
temp = dp_matrix[j]
if str1[i - 1] == str2[j - 1]:
dp_matrix[i, j] = dp_matrix[i - 1, j - 1]
dp_matrix[j] = prev
else:
dp_matrix[i, j] = min(dp_matrix[i - 1, j - 1], dp_matrix[i - 1, j], dp_matrix[i, j - 1]) + 1

return dp_matrix[num_rows - 1, num_cols - 1]
dp_matrix[j] = min(prev, dp_matrix[j], dp_matrix[j - 1]) + 1
prev = temp
return dp_matrix[num_cols - 1]


def get_closest_label(eval_pred, classes):
Expand Down

0 comments on commit 7e84dec

Please sign in to comment.