-
Notifications
You must be signed in to change notification settings - Fork 4
/
levenshtein-distance.py
84 lines (77 loc) · 3.13 KB
/
levenshtein-distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Levenshtein Distance
# 🟠 Medium
#
# https://www.algoexpert.io/questions/levenshtein-distance
#
# Tags: Dynamic Programming
import timeit
# Compute the changes that we will need to introduce for each
# combination of substrings of both input strings starting at the index
# 0, when the next characters match, the number of edits will be the
# same as without the last two characters that we added. When the
# two last characters do not match, the number of edits will be the
# same as the best between before we added one, or two, of the last
# characters, plus one for the edit needed because the current
# characters do not match.
#
# Time complexity: O(m*n) - Where m and n are the lengths of the input
# strings.
# Space complexity: O(min(m,n)) - Checking which string is longer lets
# us store only one row of extra memory containing the number of
# edits needed when we had one character less of the longer string.
class BottomUpDP:
def levenshteinDistance(self, str1: str, str2: str) -> int:
# Choose the shorter string's length for the dp array (m).
short, long = str1, str2
m, n = len(short), len(long)
if m > n:
short, long, m, n = long, short, n, m
# The number of changes we need to make if we take the empty
# string for str2.
dp = [i for i in range(m + 1)]
# Add one character of str2 per loop.
for j in range(n):
# Compute the substitutions with this prefix of str2.
nxt = [j + 1] * (m + 1)
# Iterate over the different prefixes of str1 computing the
# number of substitutions to make both current prefixes the
# same word.
for i in range(m):
if short[i] == long[j]:
# If the characters at the current indexes match,
# we only need to make the same changes as before
# adding both these characters.
nxt[i + 1] = dp[i]
else:
# If the characters do not match, we can take the
# best option between removing either of them or
# both of them and add one more change.
nxt[i + 1] = min(dp[i], dp[i + 1], nxt[i]) + 1
dp = nxt
return dp[-1]
def test():
executors = [BottomUpDP]
tests = [
["", "", 0],
["", "abc", 3],
["abc", "abc", 0],
["abc", "yabd", 2],
["table", "bal", 3],
]
for executor in executors:
start = timeit.default_timer()
for _ in range(1):
for col, t in enumerate(tests):
sol = executor()
result = sol.levenshteinDistance(t[0], t[1])
exp = t[2]
assert result == exp, (
f"\033[93m» {result} <> {exp}\033[91m for"
+ f" test {col} using \033[1m{executor.__name__}"
)
stop = timeit.default_timer()
used = str(round(stop - start, 5))
cols = "{0:20}{1:10}{2:10}"
res = cols.format(executor.__name__, used, "seconds")
print(f"\033[92m» {res}\033[0m")
test()